1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Code extracted from drivers/block/genhd.c 4 * Copyright (C) 1991-1998 Linus Torvalds 5 * Re-organised Feb 1998 Russell King 6 * 7 * We now have independent partition support from the 8 * block drivers, which allows all the partition code to 9 * be grouped in one location, and it to be mostly self 10 * contained. 11 */ 12 13 #include <linux/init.h> 14 #include <linux/module.h> 15 #include <linux/fs.h> 16 #include <linux/slab.h> 17 #include <linux/kmod.h> 18 #include <linux/ctype.h> 19 #include <linux/genhd.h> 20 #include <linux/blktrace_api.h> 21 22 #include "partitions/check.h" 23 24 #ifdef CONFIG_BLK_DEV_MD 25 extern void md_autodetect_dev(dev_t dev); 26 #endif 27 28 /* 29 * disk_name() is used by partition check code and the genhd driver. 30 * It formats the devicename of the indicated disk into 31 * the supplied buffer (of size at least 32), and returns 32 * a pointer to that same buffer (for convenience). 33 */ 34 35 char *disk_name(struct gendisk *hd, int partno, char *buf) 36 { 37 if (!partno) 38 snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name); 39 else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1])) 40 snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno); 41 else 42 snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno); 43 44 return buf; 45 } 46 47 const char *bdevname(struct block_device *bdev, char *buf) 48 { 49 return disk_name(bdev->bd_disk, bdev->bd_part->partno, buf); 50 } 51 52 EXPORT_SYMBOL(bdevname); 53 54 const char *bio_devname(struct bio *bio, char *buf) 55 { 56 return disk_name(bio->bi_disk, bio->bi_partno, buf); 57 } 58 EXPORT_SYMBOL(bio_devname); 59 60 /* 61 * There's very little reason to use this, you should really 62 * have a struct block_device just about everywhere and use 63 * bdevname() instead. 64 */ 65 const char *__bdevname(dev_t dev, char *buffer) 66 { 67 scnprintf(buffer, BDEVNAME_SIZE, "unknown-block(%u,%u)", 68 MAJOR(dev), MINOR(dev)); 69 return buffer; 70 } 71 72 EXPORT_SYMBOL(__bdevname); 73 74 static ssize_t part_partition_show(struct device *dev, 75 struct device_attribute *attr, char *buf) 76 { 77 struct hd_struct *p = dev_to_part(dev); 78 79 return sprintf(buf, "%d\n", p->partno); 80 } 81 82 static ssize_t part_start_show(struct device *dev, 83 struct device_attribute *attr, char *buf) 84 { 85 struct hd_struct *p = dev_to_part(dev); 86 87 return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect); 88 } 89 90 ssize_t part_size_show(struct device *dev, 91 struct device_attribute *attr, char *buf) 92 { 93 struct hd_struct *p = dev_to_part(dev); 94 return sprintf(buf, "%llu\n",(unsigned long long)part_nr_sects_read(p)); 95 } 96 97 static ssize_t part_ro_show(struct device *dev, 98 struct device_attribute *attr, char *buf) 99 { 100 struct hd_struct *p = dev_to_part(dev); 101 return sprintf(buf, "%d\n", p->policy ? 1 : 0); 102 } 103 104 static ssize_t part_alignment_offset_show(struct device *dev, 105 struct device_attribute *attr, char *buf) 106 { 107 struct hd_struct *p = dev_to_part(dev); 108 return sprintf(buf, "%llu\n", (unsigned long long)p->alignment_offset); 109 } 110 111 static ssize_t part_discard_alignment_show(struct device *dev, 112 struct device_attribute *attr, char *buf) 113 { 114 struct hd_struct *p = dev_to_part(dev); 115 return sprintf(buf, "%u\n", p->discard_alignment); 116 } 117 118 ssize_t part_stat_show(struct device *dev, 119 struct device_attribute *attr, char *buf) 120 { 121 struct hd_struct *p = dev_to_part(dev); 122 struct request_queue *q = part_to_disk(p)->queue; 123 unsigned int inflight; 124 125 inflight = part_in_flight(q, p); 126 return sprintf(buf, 127 "%8lu %8lu %8llu %8u " 128 "%8lu %8lu %8llu %8u " 129 "%8u %8u %8u " 130 "%8lu %8lu %8llu %8u" 131 "\n", 132 part_stat_read(p, ios[STAT_READ]), 133 part_stat_read(p, merges[STAT_READ]), 134 (unsigned long long)part_stat_read(p, sectors[STAT_READ]), 135 (unsigned int)part_stat_read_msecs(p, STAT_READ), 136 part_stat_read(p, ios[STAT_WRITE]), 137 part_stat_read(p, merges[STAT_WRITE]), 138 (unsigned long long)part_stat_read(p, sectors[STAT_WRITE]), 139 (unsigned int)part_stat_read_msecs(p, STAT_WRITE), 140 inflight, 141 jiffies_to_msecs(part_stat_read(p, io_ticks)), 142 jiffies_to_msecs(part_stat_read(p, time_in_queue)), 143 part_stat_read(p, ios[STAT_DISCARD]), 144 part_stat_read(p, merges[STAT_DISCARD]), 145 (unsigned long long)part_stat_read(p, sectors[STAT_DISCARD]), 146 (unsigned int)part_stat_read_msecs(p, STAT_DISCARD)); 147 } 148 149 ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr, 150 char *buf) 151 { 152 struct hd_struct *p = dev_to_part(dev); 153 struct request_queue *q = part_to_disk(p)->queue; 154 unsigned int inflight[2]; 155 156 part_in_flight_rw(q, p, inflight); 157 return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]); 158 } 159 160 #ifdef CONFIG_FAIL_MAKE_REQUEST 161 ssize_t part_fail_show(struct device *dev, 162 struct device_attribute *attr, char *buf) 163 { 164 struct hd_struct *p = dev_to_part(dev); 165 166 return sprintf(buf, "%d\n", p->make_it_fail); 167 } 168 169 ssize_t part_fail_store(struct device *dev, 170 struct device_attribute *attr, 171 const char *buf, size_t count) 172 { 173 struct hd_struct *p = dev_to_part(dev); 174 int i; 175 176 if (count > 0 && sscanf(buf, "%d", &i) > 0) 177 p->make_it_fail = (i == 0) ? 0 : 1; 178 179 return count; 180 } 181 #endif 182 183 static DEVICE_ATTR(partition, 0444, part_partition_show, NULL); 184 static DEVICE_ATTR(start, 0444, part_start_show, NULL); 185 static DEVICE_ATTR(size, 0444, part_size_show, NULL); 186 static DEVICE_ATTR(ro, 0444, part_ro_show, NULL); 187 static DEVICE_ATTR(alignment_offset, 0444, part_alignment_offset_show, NULL); 188 static DEVICE_ATTR(discard_alignment, 0444, part_discard_alignment_show, NULL); 189 static DEVICE_ATTR(stat, 0444, part_stat_show, NULL); 190 static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL); 191 #ifdef CONFIG_FAIL_MAKE_REQUEST 192 static struct device_attribute dev_attr_fail = 193 __ATTR(make-it-fail, 0644, part_fail_show, part_fail_store); 194 #endif 195 196 static struct attribute *part_attrs[] = { 197 &dev_attr_partition.attr, 198 &dev_attr_start.attr, 199 &dev_attr_size.attr, 200 &dev_attr_ro.attr, 201 &dev_attr_alignment_offset.attr, 202 &dev_attr_discard_alignment.attr, 203 &dev_attr_stat.attr, 204 &dev_attr_inflight.attr, 205 #ifdef CONFIG_FAIL_MAKE_REQUEST 206 &dev_attr_fail.attr, 207 #endif 208 NULL 209 }; 210 211 static struct attribute_group part_attr_group = { 212 .attrs = part_attrs, 213 }; 214 215 static const struct attribute_group *part_attr_groups[] = { 216 &part_attr_group, 217 #ifdef CONFIG_BLK_DEV_IO_TRACE 218 &blk_trace_attr_group, 219 #endif 220 NULL 221 }; 222 223 static void part_release(struct device *dev) 224 { 225 struct hd_struct *p = dev_to_part(dev); 226 blk_free_devt(dev->devt); 227 hd_free_part(p); 228 kfree(p); 229 } 230 231 static int part_uevent(struct device *dev, struct kobj_uevent_env *env) 232 { 233 struct hd_struct *part = dev_to_part(dev); 234 235 add_uevent_var(env, "PARTN=%u", part->partno); 236 if (part->info && part->info->volname[0]) 237 add_uevent_var(env, "PARTNAME=%s", part->info->volname); 238 return 0; 239 } 240 241 struct device_type part_type = { 242 .name = "partition", 243 .groups = part_attr_groups, 244 .release = part_release, 245 .uevent = part_uevent, 246 }; 247 248 static void delete_partition_work_fn(struct work_struct *work) 249 { 250 struct hd_struct *part = container_of(to_rcu_work(work), struct hd_struct, 251 rcu_work); 252 253 part->start_sect = 0; 254 part->nr_sects = 0; 255 part_stat_set_all(part, 0); 256 put_device(part_to_dev(part)); 257 } 258 259 void __delete_partition(struct percpu_ref *ref) 260 { 261 struct hd_struct *part = container_of(ref, struct hd_struct, ref); 262 INIT_RCU_WORK(&part->rcu_work, delete_partition_work_fn); 263 queue_rcu_work(system_wq, &part->rcu_work); 264 } 265 266 /* 267 * Must be called either with bd_mutex held, before a disk can be opened or 268 * after all disk users are gone. 269 */ 270 void delete_partition(struct gendisk *disk, int partno) 271 { 272 struct disk_part_tbl *ptbl = 273 rcu_dereference_protected(disk->part_tbl, 1); 274 struct hd_struct *part; 275 276 if (partno >= ptbl->len) 277 return; 278 279 part = rcu_dereference_protected(ptbl->part[partno], 1); 280 if (!part) 281 return; 282 283 rcu_assign_pointer(ptbl->part[partno], NULL); 284 rcu_assign_pointer(ptbl->last_lookup, NULL); 285 kobject_put(part->holder_dir); 286 device_del(part_to_dev(part)); 287 288 /* 289 * Remove gendisk pointer from idr so that it cannot be looked up 290 * while RCU period before freeing gendisk is running to prevent 291 * use-after-free issues. Note that the device number stays 292 * "in-use" until we really free the gendisk. 293 */ 294 blk_invalidate_devt(part_devt(part)); 295 hd_struct_kill(part); 296 } 297 298 static ssize_t whole_disk_show(struct device *dev, 299 struct device_attribute *attr, char *buf) 300 { 301 return 0; 302 } 303 static DEVICE_ATTR(whole_disk, 0444, whole_disk_show, NULL); 304 305 /* 306 * Must be called either with bd_mutex held, before a disk can be opened or 307 * after all disk users are gone. 308 */ 309 struct hd_struct *add_partition(struct gendisk *disk, int partno, 310 sector_t start, sector_t len, int flags, 311 struct partition_meta_info *info) 312 { 313 struct hd_struct *p; 314 dev_t devt = MKDEV(0, 0); 315 struct device *ddev = disk_to_dev(disk); 316 struct device *pdev; 317 struct disk_part_tbl *ptbl; 318 const char *dname; 319 int err; 320 321 err = disk_expand_part_tbl(disk, partno); 322 if (err) 323 return ERR_PTR(err); 324 ptbl = rcu_dereference_protected(disk->part_tbl, 1); 325 326 if (ptbl->part[partno]) 327 return ERR_PTR(-EBUSY); 328 329 p = kzalloc(sizeof(*p), GFP_KERNEL); 330 if (!p) 331 return ERR_PTR(-EBUSY); 332 333 if (!init_part_stats(p)) { 334 err = -ENOMEM; 335 goto out_free; 336 } 337 338 seqcount_init(&p->nr_sects_seq); 339 pdev = part_to_dev(p); 340 341 p->start_sect = start; 342 p->alignment_offset = 343 queue_limit_alignment_offset(&disk->queue->limits, start); 344 p->discard_alignment = 345 queue_limit_discard_alignment(&disk->queue->limits, start); 346 p->nr_sects = len; 347 p->partno = partno; 348 p->policy = get_disk_ro(disk); 349 350 if (info) { 351 struct partition_meta_info *pinfo = alloc_part_info(disk); 352 if (!pinfo) { 353 err = -ENOMEM; 354 goto out_free_stats; 355 } 356 memcpy(pinfo, info, sizeof(*info)); 357 p->info = pinfo; 358 } 359 360 dname = dev_name(ddev); 361 if (isdigit(dname[strlen(dname) - 1])) 362 dev_set_name(pdev, "%sp%d", dname, partno); 363 else 364 dev_set_name(pdev, "%s%d", dname, partno); 365 366 device_initialize(pdev); 367 pdev->class = &block_class; 368 pdev->type = &part_type; 369 pdev->parent = ddev; 370 371 err = blk_alloc_devt(p, &devt); 372 if (err) 373 goto out_free_info; 374 pdev->devt = devt; 375 376 /* delay uevent until 'holders' subdir is created */ 377 dev_set_uevent_suppress(pdev, 1); 378 err = device_add(pdev); 379 if (err) 380 goto out_put; 381 382 err = -ENOMEM; 383 p->holder_dir = kobject_create_and_add("holders", &pdev->kobj); 384 if (!p->holder_dir) 385 goto out_del; 386 387 dev_set_uevent_suppress(pdev, 0); 388 if (flags & ADDPART_FLAG_WHOLEDISK) { 389 err = device_create_file(pdev, &dev_attr_whole_disk); 390 if (err) 391 goto out_del; 392 } 393 394 err = hd_ref_init(p); 395 if (err) { 396 if (flags & ADDPART_FLAG_WHOLEDISK) 397 goto out_remove_file; 398 goto out_del; 399 } 400 401 /* everything is up and running, commence */ 402 rcu_assign_pointer(ptbl->part[partno], p); 403 404 /* suppress uevent if the disk suppresses it */ 405 if (!dev_get_uevent_suppress(ddev)) 406 kobject_uevent(&pdev->kobj, KOBJ_ADD); 407 return p; 408 409 out_free_info: 410 free_part_info(p); 411 out_free_stats: 412 free_part_stats(p); 413 out_free: 414 kfree(p); 415 return ERR_PTR(err); 416 out_remove_file: 417 device_remove_file(pdev, &dev_attr_whole_disk); 418 out_del: 419 kobject_put(p->holder_dir); 420 device_del(pdev); 421 out_put: 422 put_device(pdev); 423 return ERR_PTR(err); 424 } 425 426 static bool disk_unlock_native_capacity(struct gendisk *disk) 427 { 428 const struct block_device_operations *bdops = disk->fops; 429 430 if (bdops->unlock_native_capacity && 431 !(disk->flags & GENHD_FL_NATIVE_CAPACITY)) { 432 printk(KERN_CONT "enabling native capacity\n"); 433 bdops->unlock_native_capacity(disk); 434 disk->flags |= GENHD_FL_NATIVE_CAPACITY; 435 return true; 436 } else { 437 printk(KERN_CONT "truncated\n"); 438 return false; 439 } 440 } 441 442 static int drop_partitions(struct gendisk *disk, struct block_device *bdev) 443 { 444 struct disk_part_iter piter; 445 struct hd_struct *part; 446 int res; 447 448 if (bdev->bd_part_count || bdev->bd_super) 449 return -EBUSY; 450 res = invalidate_partition(disk, 0); 451 if (res) 452 return res; 453 454 disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); 455 while ((part = disk_part_iter_next(&piter))) 456 delete_partition(disk, part->partno); 457 disk_part_iter_exit(&piter); 458 459 return 0; 460 } 461 462 static bool part_zone_aligned(struct gendisk *disk, 463 struct block_device *bdev, 464 sector_t from, sector_t size) 465 { 466 unsigned int zone_sectors = bdev_zone_sectors(bdev); 467 468 /* 469 * If this function is called, then the disk is a zoned block device 470 * (host-aware or host-managed). This can be detected even if the 471 * zoned block device support is disabled (CONFIG_BLK_DEV_ZONED not 472 * set). In this case, however, only host-aware devices will be seen 473 * as a block device is not created for host-managed devices. Without 474 * zoned block device support, host-aware drives can still be used as 475 * regular block devices (no zone operation) and their zone size will 476 * be reported as 0. Allow this case. 477 */ 478 if (!zone_sectors) 479 return true; 480 481 /* 482 * Check partition start and size alignement. If the drive has a 483 * smaller last runt zone, ignore it and allow the partition to 484 * use it. Check the zone size too: it should be a power of 2 number 485 * of sectors. 486 */ 487 if (WARN_ON_ONCE(!is_power_of_2(zone_sectors))) { 488 u32 rem; 489 490 div_u64_rem(from, zone_sectors, &rem); 491 if (rem) 492 return false; 493 if ((from + size) < get_capacity(disk)) { 494 div_u64_rem(size, zone_sectors, &rem); 495 if (rem) 496 return false; 497 } 498 499 } else { 500 501 if (from & (zone_sectors - 1)) 502 return false; 503 if ((from + size) < get_capacity(disk) && 504 (size & (zone_sectors - 1))) 505 return false; 506 507 } 508 509 return true; 510 } 511 512 int rescan_partitions(struct gendisk *disk, struct block_device *bdev) 513 { 514 struct parsed_partitions *state = NULL; 515 struct hd_struct *part; 516 int p, highest, res; 517 rescan: 518 if (state && !IS_ERR(state)) { 519 free_partitions(state); 520 state = NULL; 521 } 522 523 res = drop_partitions(disk, bdev); 524 if (res) 525 return res; 526 527 if (disk->fops->revalidate_disk) 528 disk->fops->revalidate_disk(disk); 529 check_disk_size_change(disk, bdev, true); 530 bdev->bd_invalidated = 0; 531 if (!get_capacity(disk) || !(state = check_partition(disk, bdev))) 532 return 0; 533 if (IS_ERR(state)) { 534 /* 535 * I/O error reading the partition table. If any 536 * partition code tried to read beyond EOD, retry 537 * after unlocking native capacity. 538 */ 539 if (PTR_ERR(state) == -ENOSPC) { 540 printk(KERN_WARNING "%s: partition table beyond EOD, ", 541 disk->disk_name); 542 if (disk_unlock_native_capacity(disk)) 543 goto rescan; 544 } 545 return -EIO; 546 } 547 /* 548 * If any partition code tried to read beyond EOD, try 549 * unlocking native capacity even if partition table is 550 * successfully read as we could be missing some partitions. 551 */ 552 if (state->access_beyond_eod) { 553 printk(KERN_WARNING 554 "%s: partition table partially beyond EOD, ", 555 disk->disk_name); 556 if (disk_unlock_native_capacity(disk)) 557 goto rescan; 558 } 559 560 /* tell userspace that the media / partition table may have changed */ 561 kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE); 562 563 /* Detect the highest partition number and preallocate 564 * disk->part_tbl. This is an optimization and not strictly 565 * necessary. 566 */ 567 for (p = 1, highest = 0; p < state->limit; p++) 568 if (state->parts[p].size) 569 highest = p; 570 571 disk_expand_part_tbl(disk, highest); 572 573 /* add partitions */ 574 for (p = 1; p < state->limit; p++) { 575 sector_t size, from; 576 577 size = state->parts[p].size; 578 if (!size) 579 continue; 580 581 from = state->parts[p].from; 582 if (from >= get_capacity(disk)) { 583 printk(KERN_WARNING 584 "%s: p%d start %llu is beyond EOD, ", 585 disk->disk_name, p, (unsigned long long) from); 586 if (disk_unlock_native_capacity(disk)) 587 goto rescan; 588 continue; 589 } 590 591 if (from + size > get_capacity(disk)) { 592 printk(KERN_WARNING 593 "%s: p%d size %llu extends beyond EOD, ", 594 disk->disk_name, p, (unsigned long long) size); 595 596 if (disk_unlock_native_capacity(disk)) { 597 /* free state and restart */ 598 goto rescan; 599 } else { 600 /* 601 * we can not ignore partitions of broken tables 602 * created by for example camera firmware, but 603 * we limit them to the end of the disk to avoid 604 * creating invalid block devices 605 */ 606 size = get_capacity(disk) - from; 607 } 608 } 609 610 /* 611 * On a zoned block device, partitions should be aligned on the 612 * device zone size (i.e. zone boundary crossing not allowed). 613 * Otherwise, resetting the write pointer of the last zone of 614 * one partition may impact the following partition. 615 */ 616 if (bdev_is_zoned(bdev) && 617 !part_zone_aligned(disk, bdev, from, size)) { 618 printk(KERN_WARNING 619 "%s: p%d start %llu+%llu is not zone aligned\n", 620 disk->disk_name, p, (unsigned long long) from, 621 (unsigned long long) size); 622 continue; 623 } 624 625 part = add_partition(disk, p, from, size, 626 state->parts[p].flags, 627 &state->parts[p].info); 628 if (IS_ERR(part)) { 629 printk(KERN_ERR " %s: p%d could not be added: %ld\n", 630 disk->disk_name, p, -PTR_ERR(part)); 631 continue; 632 } 633 #ifdef CONFIG_BLK_DEV_MD 634 if (state->parts[p].flags & ADDPART_FLAG_RAID) 635 md_autodetect_dev(part_to_dev(part)->devt); 636 #endif 637 } 638 free_partitions(state); 639 return 0; 640 } 641 642 int invalidate_partitions(struct gendisk *disk, struct block_device *bdev) 643 { 644 int res; 645 646 if (!bdev->bd_invalidated) 647 return 0; 648 649 res = drop_partitions(disk, bdev); 650 if (res) 651 return res; 652 653 set_capacity(disk, 0); 654 check_disk_size_change(disk, bdev, false); 655 bdev->bd_invalidated = 0; 656 /* tell userspace that the media / partition table may have changed */ 657 kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE); 658 659 return 0; 660 } 661 662 unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p) 663 { 664 struct address_space *mapping = bdev->bd_inode->i_mapping; 665 struct page *page; 666 667 page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_SHIFT-9)), NULL); 668 if (!IS_ERR(page)) { 669 if (PageError(page)) 670 goto fail; 671 p->v = page; 672 return (unsigned char *)page_address(page) + ((n & ((1 << (PAGE_SHIFT - 9)) - 1)) << 9); 673 fail: 674 put_page(page); 675 } 676 p->v = NULL; 677 return NULL; 678 } 679 680 EXPORT_SYMBOL(read_dev_sector); 681
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.