Commit 3d0a8d10 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-3.2/drivers' of git://git.kernel.dk/linux-block

* 'for-3.2/drivers' of git://git.kernel.dk/linux-block: (30 commits)
  virtio-blk: use ida to allocate disk index
  hpsa: add small delay when using PCI Power Management to reset for kump
  cciss: add small delay when using PCI Power Management to reset for kump
  xen/blkback: Fix two races in the handling of barrier requests.
  xen/blkback: Check for proper operation.
  xen/blkback: Fix the inhibition to map pages when discarding sector ranges.
  xen/blkback: Report VBD_WSECT (wr_sect) properly.
  xen/blkback: Support 'feature-barrier' aka old-style BARRIER requests.
  xen-blkfront: plug device number leak in xlblk_init() error path
  xen-blkfront: If no barrier or flush is supported, use invalid operation.
  xen-blkback: use kzalloc() in favor of kmalloc()+memset()
  xen-blkback: fixed indentation and comments
  xen-blkfront: fix a deadlock while handling discard response
  xen-blkfront: Handle discard requests.
  xen-blkback: Implement discard requests ('feature-discard')
  xen-blkfront: add BLKIF_OP_DISCARD and discard request struct
  drivers/block/loop.c: remove unnecessary bdev argument from loop_clr_fd()
  drivers/block/loop.c: emit uevent on auto release
  drivers/block/cpqarray.c: use pci_dev->revision
  loop: always allow userspace partitions and optionally support automatic scanning
  ...

Fic up trivial header file includsion conflict in drivers/block/loop.c
parents b4fdcb02 a0eda625
......@@ -71,3 +71,10 @@ Description: Value of 1 indicates the controller can honor the reset_devices
a dump device, as kdump requires resetting the device in order
to work reliably.
Where: /sys/bus/pci/devices/<dev>/ccissX/transport_mode
Date: July 2011
Kernel Version: 3.0
Contact: iss_storagedev@hp.com
Description: Value of "simple" indicates that the controller has been placed
in "simple mode". Value of "performant" indicates that the
controller has been placed in "performant mode".
......@@ -78,6 +78,16 @@ The device naming scheme is:
/dev/cciss/c1d1p2 Controller 1, disk 1, partition 2
/dev/cciss/c1d1p3 Controller 1, disk 1, partition 3
CCISS simple mode support
-------------------------
The "cciss_simple_mode=1" boot parameter may be used to prevent the driver
from putting the controller into "performant" mode. The difference is that
with simple mode, each command completion requires an interrupt, while with
"performant mode" (the default, and ordinarily better performing) it is
possible to have multiple command completions indicated by a single
interrupt.
SCSI tape drive and medium changer support
------------------------------------------
......
......@@ -537,7 +537,7 @@ void register_disk(struct gendisk *disk)
disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
/* No minors to use for partitions */
if (!disk_partitionable(disk))
if (!disk_part_scan_enabled(disk))
goto exit;
/* No such device (e.g., media were just removed) */
......@@ -848,7 +848,7 @@ static int show_partition(struct seq_file *seqf, void *v)
char buf[BDEVNAME_SIZE];
/* Don't show non-partitionable removeable devices or empty devices */
if (!get_capacity(sgp) || (!disk_partitionable(sgp) &&
if (!get_capacity(sgp) || (!disk_max_parts(sgp) &&
(sgp->flags & GENHD_FL_REMOVABLE)))
return 0;
if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
......
......@@ -101,7 +101,7 @@ static int blkdev_reread_part(struct block_device *bdev)
struct gendisk *disk = bdev->bd_disk;
int res;
if (!disk_partitionable(disk) || bdev != bdev->bd_contains)
if (!disk_part_scan_enabled(disk) || bdev != bdev->bd_contains)
return -EINVAL;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
......
......@@ -68,6 +68,10 @@ static int cciss_tape_cmds = 6;
module_param(cciss_tape_cmds, int, 0644);
MODULE_PARM_DESC(cciss_tape_cmds,
"number of commands to allocate for tape devices (default: 6)");
static int cciss_simple_mode;
module_param(cciss_simple_mode, int, S_IRUGO|S_IWUSR);
MODULE_PARM_DESC(cciss_simple_mode,
"Use 'simple mode' rather than 'performant mode'");
static DEFINE_MUTEX(cciss_mutex);
static struct proc_dir_entry *proc_cciss;
......@@ -176,6 +180,7 @@ static void cciss_geometry_inquiry(ctlr_info_t *h, int logvol,
unsigned int block_size, InquiryData_struct *inq_buff,
drive_info_struct *drv);
static void __devinit cciss_interrupt_mode(ctlr_info_t *);
static int __devinit cciss_enter_simple_mode(struct ctlr_info *h);
static void start_io(ctlr_info_t *h);
static int sendcmd_withirq(ctlr_info_t *h, __u8 cmd, void *buff, size_t size,
__u8 page_code, unsigned char scsi3addr[],
......@@ -388,7 +393,7 @@ static void cciss_seq_show_header(struct seq_file *seq)
h->product_name,
(unsigned long)h->board_id,
h->firm_ver[0], h->firm_ver[1], h->firm_ver[2],
h->firm_ver[3], (unsigned int)h->intr[PERF_MODE_INT],
h->firm_ver[3], (unsigned int)h->intr[h->intr_mode],
h->num_luns,
h->Qdepth, h->commands_outstanding,
h->maxQsinceinit, h->max_outstanding, h->maxSG);
......@@ -636,6 +641,18 @@ static ssize_t host_store_rescan(struct device *dev,
}
static DEVICE_ATTR(rescan, S_IWUSR, NULL, host_store_rescan);
static ssize_t host_show_transport_mode(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct ctlr_info *h = to_hba(dev);
return snprintf(buf, 20, "%s\n",
h->transMethod & CFGTBL_Trans_Performant ?
"performant" : "simple");
}
static DEVICE_ATTR(transport_mode, S_IRUGO, host_show_transport_mode, NULL);
static ssize_t dev_show_unique_id(struct device *dev,
struct device_attribute *attr,
char *buf)
......@@ -808,6 +825,7 @@ static DEVICE_ATTR(usage_count, S_IRUGO, cciss_show_usage_count, NULL);
static struct attribute *cciss_host_attrs[] = {
&dev_attr_rescan.attr,
&dev_attr_resettable.attr,
&dev_attr_transport_mode.attr,
NULL
};
......@@ -3984,6 +4002,9 @@ static void __devinit cciss_put_controller_into_performant_mode(ctlr_info_t *h)
{
__u32 trans_support;
if (cciss_simple_mode)
return;
dev_dbg(&h->pdev->dev, "Trying to put board into Performant mode\n");
/* Attempt to put controller into performant mode if supported */
/* Does board support performant mode? */
......@@ -4081,7 +4102,7 @@ static void __devinit cciss_interrupt_mode(ctlr_info_t *h)
default_int_mode:
#endif /* CONFIG_PCI_MSI */
/* if we get here we're going to use the default interrupt mode */
h->intr[PERF_MODE_INT] = h->pdev->irq;
h->intr[h->intr_mode] = h->pdev->irq;
return;
}
......@@ -4341,6 +4362,9 @@ static int __devinit cciss_pci_init(ctlr_info_t *h)
}
cciss_enable_scsi_prefetch(h);
cciss_p600_dma_prefetch_quirk(h);
err = cciss_enter_simple_mode(h);
if (err)
goto err_out_free_res;
cciss_put_controller_into_performant_mode(h);
return 0;
......@@ -4533,6 +4557,13 @@ static int cciss_controller_hard_reset(struct pci_dev *pdev,
pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
pmcsr |= PCI_D0;
pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
/*
* The P600 requires a small delay when changing states.
* Otherwise we may think the board did not reset and we bail.
* This for kdump only and is particular to the P600.
*/
msleep(500);
}
return 0;
}
......@@ -4843,20 +4874,20 @@ static int cciss_request_irq(ctlr_info_t *h,
irqreturn_t (*intxhandler)(int, void *))
{
if (h->msix_vector || h->msi_vector) {
if (!request_irq(h->intr[PERF_MODE_INT], msixhandler,
if (!request_irq(h->intr[h->intr_mode], msixhandler,
IRQF_DISABLED, h->devname, h))
return 0;
dev_err(&h->pdev->dev, "Unable to get msi irq %d"
" for %s\n", h->intr[PERF_MODE_INT],
" for %s\n", h->intr[h->intr_mode],
h->devname);
return -1;
}
if (!request_irq(h->intr[PERF_MODE_INT], intxhandler,
if (!request_irq(h->intr[h->intr_mode], intxhandler,
IRQF_DISABLED, h->devname, h))
return 0;
dev_err(&h->pdev->dev, "Unable to get irq %d for %s\n",
h->intr[PERF_MODE_INT], h->devname);
h->intr[h->intr_mode], h->devname);
return -1;
}
......@@ -4887,7 +4918,7 @@ static void cciss_undo_allocations_after_kdump_soft_reset(ctlr_info_t *h)
{
int ctlr = h->ctlr;
free_irq(h->intr[PERF_MODE_INT], h);
free_irq(h->intr[h->intr_mode], h);
#ifdef CONFIG_PCI_MSI
if (h->msix_vector)
pci_disable_msix(h->pdev);
......@@ -4953,6 +4984,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
h = hba[i];
h->pdev = pdev;
h->busy_initializing = 1;
h->intr_mode = cciss_simple_mode ? SIMPLE_MODE_INT : PERF_MODE_INT;
INIT_LIST_HEAD(&h->cmpQ);
INIT_LIST_HEAD(&h->reqQ);
mutex_init(&h->busy_shutting_down);
......@@ -5009,7 +5041,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
dev_info(&h->pdev->dev, "%s: <0x%x> at PCI %s IRQ %d%s using DAC\n",
h->devname, pdev->device, pci_name(pdev),
h->intr[PERF_MODE_INT], dac ? "" : " not");
h->intr[h->intr_mode], dac ? "" : " not");
if (cciss_allocate_cmd_pool(h))
goto clean4;
......@@ -5056,7 +5088,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
spin_lock_irqsave(&h->lock, flags);
h->access.set_intr_mask(h, CCISS_INTR_OFF);
spin_unlock_irqrestore(&h->lock, flags);
free_irq(h->intr[PERF_MODE_INT], h);
free_irq(h->intr[h->intr_mode], h);
rc = cciss_request_irq(h, cciss_msix_discard_completions,
cciss_intx_discard_completions);
if (rc) {
......@@ -5133,7 +5165,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
cciss_free_cmd_pool(h);
cciss_free_scatterlists(h);
cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds);
free_irq(h->intr[PERF_MODE_INT], h);
free_irq(h->intr[h->intr_mode], h);
clean2:
unregister_blkdev(h->major, h->devname);
clean1:
......@@ -5172,9 +5204,31 @@ static void cciss_shutdown(struct pci_dev *pdev)
if (return_code != IO_OK)
dev_warn(&h->pdev->dev, "Error flushing cache\n");
h->access.set_intr_mask(h, CCISS_INTR_OFF);
free_irq(h->intr[PERF_MODE_INT], h);
free_irq(h->intr[h->intr_mode], h);
}
static int __devinit cciss_enter_simple_mode(struct ctlr_info *h)
{
u32 trans_support;
trans_support = readl(&(h->cfgtable->TransportSupport));
if (!(trans_support & SIMPLE_MODE))
return -ENOTSUPP;
h->max_commands = readl(&(h->cfgtable->CmdsOutMax));
writel(CFGTBL_Trans_Simple, &(h->cfgtable->HostWrite.TransportRequest));
writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
cciss_wait_for_mode_change_ack(h);
print_cfg_table(h);
if (!(readl(&(h->cfgtable->TransportActive)) & CFGTBL_Trans_Simple)) {
dev_warn(&h->pdev->dev, "unable to get board into simple mode\n");
return -ENODEV;
}
h->transMethod = CFGTBL_Trans_Simple;
return 0;
}
static void __devexit cciss_remove_one(struct pci_dev *pdev)
{
ctlr_info_t *h;
......
......@@ -92,6 +92,7 @@ struct ctlr_info
unsigned int intr[4];
unsigned int msix_vector;
unsigned int msi_vector;
int intr_mode;
int cciss_max_sectors;
BYTE cciss_read;
BYTE cciss_write;
......
......@@ -620,6 +620,7 @@ static int cpqarray_pci_init(ctlr_info_t *c, struct pci_dev *pdev)
}
vendor_id = pdev->vendor;
device_id = pdev->device;
revision = pdev->revision;
irq = pdev->irq;
for(i=0; i<6; i++)
......@@ -632,7 +633,6 @@ static int cpqarray_pci_init(ctlr_info_t *c, struct pci_dev *pdev)
}
pci_read_config_word(pdev, PCI_COMMAND, &command);
pci_read_config_byte(pdev, PCI_CLASS_REVISION, &revision);
pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &cache_line_size);
pci_read_config_byte(pdev, PCI_LATENCY_TIMER, &latency_timer);
......
......@@ -76,6 +76,8 @@
#include <linux/splice.h>
#include <linux/sysfs.h>
#include <linux/miscdevice.h>
#include <linux/falloc.h>
#include <asm/uaccess.h>
static DEFINE_IDR(loop_index_idr);
......@@ -407,6 +409,29 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
}
}
/*
* We use punch hole to reclaim the free space used by the
* image a.k.a. discard. However we do support discard if
* encryption is enabled, because it may give an attacker
* useful information.
*/
if (bio->bi_rw & REQ_DISCARD) {
struct file *file = lo->lo_backing_file;
int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
if ((!file->f_op->fallocate) ||
lo->lo_encrypt_key_size) {
ret = -EOPNOTSUPP;
goto out;
}
ret = file->f_op->fallocate(file, mode, pos,
bio->bi_size);
if (unlikely(ret && ret != -EINVAL &&
ret != -EOPNOTSUPP))
ret = -EIO;
goto out;
}
ret = lo_send(lo, bio, pos);
if ((bio->bi_rw & REQ_FUA) && !ret) {
......@@ -622,7 +647,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
goto out_putf;
fput(old_file);
if (max_part > 0)
if (lo->lo_flags & LO_FLAGS_PARTSCAN)
ioctl_by_bdev(bdev, BLKRRPART, 0);
return 0;
......@@ -699,16 +724,25 @@ static ssize_t loop_attr_autoclear_show(struct loop_device *lo, char *buf)
return sprintf(buf, "%s\n", autoclear ? "1" : "0");
}
static ssize_t loop_attr_partscan_show(struct loop_device *lo, char *buf)
{
int partscan = (lo->lo_flags & LO_FLAGS_PARTSCAN);
return sprintf(buf, "%s\n", partscan ? "1" : "0");
}
LOOP_ATTR_RO(backing_file);
LOOP_ATTR_RO(offset);
LOOP_ATTR_RO(sizelimit);
LOOP_ATTR_RO(autoclear);
LOOP_ATTR_RO(partscan);
static struct attribute *loop_attrs[] = {
&loop_attr_backing_file.attr,
&loop_attr_offset.attr,
&loop_attr_sizelimit.attr,
&loop_attr_autoclear.attr,
&loop_attr_partscan.attr,
NULL,
};
......@@ -729,6 +763,35 @@ static void loop_sysfs_exit(struct loop_device *lo)
&loop_attribute_group);
}
static void loop_config_discard(struct loop_device *lo)
{
struct file *file = lo->lo_backing_file;
struct inode *inode = file->f_mapping->host;
struct request_queue *q = lo->lo_queue;
/*
* We use punch hole to reclaim the free space used by the
* image a.k.a. discard. However we do support discard if
* encryption is enabled, because it may give an attacker
* useful information.
*/
if ((!file->f_op->fallocate) ||
lo->lo_encrypt_key_size) {
q->limits.discard_granularity = 0;
q->limits.discard_alignment = 0;
q->limits.max_discard_sectors = 0;
q->limits.discard_zeroes_data = 0;
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
return;
}
q->limits.discard_granularity = inode->i_sb->s_blocksize;
q->limits.discard_alignment = inode->i_sb->s_blocksize;
q->limits.max_discard_sectors = UINT_MAX >> 9;
q->limits.discard_zeroes_data = 1;
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
}
static int loop_set_fd(struct loop_device *lo, fmode_t mode,
struct block_device *bdev, unsigned int arg)
{
......@@ -829,7 +892,9 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
}
lo->lo_state = Lo_bound;
wake_up_process(lo->lo_thread);
if (max_part > 0)
if (part_shift)
lo->lo_flags |= LO_FLAGS_PARTSCAN;
if (lo->lo_flags & LO_FLAGS_PARTSCAN)
ioctl_by_bdev(bdev, BLKRRPART, 0);
return 0;
......@@ -890,10 +955,11 @@ loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer,
return err;
}
static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
static int loop_clr_fd(struct loop_device *lo)
{
struct file *filp = lo->lo_backing_file;
gfp_t gfp = lo->old_gfp_mask;
struct block_device *bdev = lo->lo_device;
if (lo->lo_state != Lo_bound)
return -ENXIO;
......@@ -922,7 +988,6 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
lo->lo_offset = 0;
lo->lo_sizelimit = 0;
lo->lo_encrypt_key_size = 0;
lo->lo_flags = 0;
lo->lo_thread = NULL;
memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
......@@ -940,8 +1005,11 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
lo->lo_state = Lo_unbound;
/* This is safe: open() is still holding a reference. */
module_put(THIS_MODULE);
if (max_part > 0 && bdev)
if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev)
ioctl_by_bdev(bdev, BLKRRPART, 0);
lo->lo_flags = 0;
if (!part_shift)
lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;
mutex_unlock(&lo->lo_ctl_mutex);
/*
* Need not hold lo_ctl_mutex to fput backing file.
......@@ -995,6 +1063,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
if (figure_loop_size(lo))
return -EFBIG;
}
loop_config_discard(lo);
memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
......@@ -1010,6 +1079,13 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
(info->lo_flags & LO_FLAGS_AUTOCLEAR))
lo->lo_flags ^= LO_FLAGS_AUTOCLEAR;
if ((info->lo_flags & LO_FLAGS_PARTSCAN) &&
!(lo->lo_flags & LO_FLAGS_PARTSCAN)) {
lo->lo_flags |= LO_FLAGS_PARTSCAN;
lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
ioctl_by_bdev(lo->lo_device, BLKRRPART, 0);
}
lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
lo->lo_init[0] = info->lo_init[0];
lo->lo_init[1] = info->lo_init[1];
......@@ -1203,7 +1279,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
break;
case LOOP_CLR_FD:
/* loop_clr_fd would have unlocked lo_ctl_mutex on success */
err = loop_clr_fd(lo, bdev);
err = loop_clr_fd(lo);
if (!err)
goto out_unlocked;
break;
......@@ -1423,7 +1499,7 @@ static int lo_release(struct gendisk *disk, fmode_t mode)
* In autoclear mode, stop the loop thread
* and remove configuration after last close.
*/
err = loop_clr_fd(lo, NULL);
err = loop_clr_fd(lo);
if (!err)
goto out_unlocked;
} else {
......@@ -1545,6 +1621,27 @@ static int loop_add(struct loop_device **l, int i)
if (!disk)
goto out_free_queue;
/*
* Disable partition scanning by default. The in-kernel partition
* scanning can be requested individually per-device during its
* setup. Userspace can always add and remove partitions from all
* devices. The needed partition minors are allocated from the
* extended minor space, the main loop device numbers will continue
* to match the loop minors, regardless of the number of partitions
* used.
*
* If max_part is given, partition scanning is globally enabled for
* all loop devices. The minors for the main loop devices will be
* multiples of max_part.
*
* Note: Global-for-all-devices, set-only-at-init, read-only module
* parameteters like 'max_loop' and 'max_part' make things needlessly
* complicated, are too static, inflexible and may surprise
* userspace tools. Parameters like this in general should be avoided.
*/
if (!part_shift)
disk->flags |= GENHD_FL_NO_PART_SCAN;
disk->flags |= GENHD_FL_EXT_DEVT;
mutex_init(&lo->lo_ctl_mutex);
lo->lo_number = i;
lo->lo_thread = NULL;
......
......@@ -127,8 +127,7 @@ static void sock_shutdown(struct nbd_device *lo, int lock)
if (lock)
mutex_lock(&lo->tx_lock);
if (lo->sock) {
printk(KERN_WARNING "%s: shutting down socket\n",
lo->disk->disk_name);
dev_warn(disk_to_dev(lo->disk), "shutting down socket\n");
kernel_sock_shutdown(lo->sock, SHUT_RDWR);
lo->sock = NULL;
}
......@@ -158,8 +157,9 @@ static int sock_xmit(struct nbd_device *lo, int send, void *buf, int size,
sigset_t blocked, oldset;
if (unlikely(!sock)) {
printk(KERN_ERR "%s: Attempted %s on closed socket in sock_xmit\n",
lo->disk->disk_name, (send ? "send" : "recv"));
dev_err(disk_to_dev(lo->disk),
"Attempted %s on closed socket in sock_xmit\n",
(send ? "send" : "recv"));
return -EINVAL;
}
......@@ -250,8 +250,8 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req)
result = sock_xmit(lo, 1, &request, sizeof(request),
(nbd_cmd(req) == NBD_CMD_WRITE) ? MSG_MORE : 0);
if (result <= 0) {
printk(KERN_ERR "%s: Send control failed (result %d)\n",
lo->disk->disk_name, result);
dev_err(disk_to_dev(lo->disk),
"Send control failed (result %d)\n", result);
goto error_out;
}
......@@ -270,8 +270,9 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req)
lo->disk->disk_name, req, bvec->bv_len);
result = sock_send_bvec(lo, bvec, flags);
if (result <= 0) {
printk(KERN_ERR "%s: Send data failed (result %d)\n",
lo->disk->disk_name, result);
dev_err(disk_to_dev(lo->disk),
"Send data failed (result %d)\n",
result);
goto error_out;
}
}
......@@ -328,14 +329,13 @@ static struct request *nbd_read_stat(struct nbd_device *lo)
reply.magic = 0;
result = sock_xmit(lo, 0, &reply, sizeof(reply), MSG_WAITALL);
if (result <= 0) {
printk(KERN_ERR "%s: Receive control failed (result %d)\n",
lo->disk->disk_name, result);
dev_err(disk_to_dev(lo->disk),
"Receive control failed (result %d)\n", result);
goto harderror;
}
if (ntohl(reply.magic) != NBD_REPLY_MAGIC) {
printk(KERN_ERR "%s: Wrong magic (0x%lx)\n",
lo->disk->disk_name,
dev_err(disk_to_dev(lo->disk), "Wrong magic (0x%lx)\n",
(unsigned long)ntohl(reply.magic));
result = -EPROTO;
goto harderror;
......@@ -347,15 +347,15 @@ static struct request *nbd_read_stat(struct nbd_device *lo)
if (result != -ENOENT)
goto harderror;
printk(KERN_ERR "%s: Unexpected reply (%p)\n",
lo->disk->disk_name, reply.handle);
dev_err(disk_to_dev(lo->disk), "Unexpected reply (%p)\n",
reply.handle);
result = -EBADR;
goto harderror;
}
if (ntohl(reply.error)) {
printk(KERN_ERR "%s: Other side returned error (%d)\n",
lo->disk->disk_name, ntohl(reply.error));
dev_err(disk_to_dev(lo->disk), "Other side returned error (%d)\n",
ntohl(reply.error));
req->errors++;
return req;
}
......@@ -369,8 +369,8 @@ static struct request *nbd_read_stat(struct nbd_device *lo)
rq_for_each_segment(bvec, req, iter) {
result = sock_recv_bvec(lo, bvec);
if (result <= 0) {
printk(KERN_ERR "%s: Receive data failed (result %d)\n",
lo->disk->disk_name, result);
dev_err(disk_to_dev(lo->disk), "Receive data failed (result %d)\n",
result);
req->errors++;
return req;
}
......@@ -405,10 +405,10 @@ static int nbd_do_it(struct nbd_device *lo)
BUG_ON(lo->magic != LO_MAGIC);
lo->pid = current->pid;
ret = sysfs_create_file(&disk_to_dev(lo->disk)->kobj, &pid_attr.attr);
lo->pid = task_pid_nr(current);
ret = device_create_file(disk_to_dev(lo->disk), &pid_attr);
if (ret) {
printk(KERN_ERR "nbd: sysfs_create_file failed!");
dev_err(disk_to_dev(lo->disk), "device_create_file failed!\n");
lo->pid = 0;
return ret;
}
......@@ -416,7 +416,7 @@ static int nbd_do_it(struct nbd_device *lo)
while ((req = nbd_read_stat(lo)) != NULL)
nbd_end_request(req);
sysfs_remove_file(&disk_to_dev(lo->disk)->kobj, &pid_attr.attr);
device_remove_file(disk_to_dev(lo->disk), &pid_attr);
lo->pid = 0;
return 0;
}
......@@ -457,8 +457,8 @@ static void nbd_handle_req(struct nbd_device *lo, struct request *req)
if (rq_data_dir(req) == WRITE) {
nbd_cmd(req) = NBD_CMD_WRITE;
if (lo->flags & NBD_READ_ONLY) {
printk(KERN_ERR "%s: Write on read-only\n",
lo->disk->disk_name);
dev_err(disk_to_dev(lo->disk),
"Write on read-only\n");
goto error_out;
}
}
......@@ -468,16 +468,15 @@ static void nbd_handle_req(struct nbd_device *lo, struct request *req)
mutex_lock(&lo->tx_lock);
if (unlikely(!lo->sock)) {
mutex_unlock(&lo->tx_lock);
printk(KERN_ERR "%s: Attempted send on closed socket\n",
lo->disk->disk_name);
dev_err(disk_to_dev(lo->disk),
"Attempted send on closed socket\n");
goto error_out;
}
lo->active_req = req;
if (nbd_send_req(lo, req) != 0) {
printk(KERN_ERR "%s: Request send failed\n",
lo->disk->disk_name);
dev_err(disk_to_dev(lo->disk), "Request send failed\n");
req->errors++;
nbd_end_request(req);
} else {
......@@ -549,8 +548,8 @@ static void do_nbd_request(struct request_queue *q)
BUG_ON(lo->magic != LO_MAGIC);
if (unlikely(!lo->sock)) {
printk(KERN_ERR "%s: Attempted send on closed socket\n",
lo->disk->disk_name);
dev_err(disk_to_dev(lo->disk),
"Attempted send on closed socket\n");
req->errors++;
nbd_end_request(req);
spin_lock_irq(q->queue_lock);
......@@ -576,7 +575,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
case NBD_DISCONNECT: {
struct request sreq;
printk(KERN_INFO "%s: NBD_DISCONNECT\n", lo->disk->disk_name);
dev_info(disk_to_dev(lo->disk), "NBD_DISCONNECT\n");
blk_rq_init(NULL, &sreq);
sreq.cmd_type = REQ_TYPE_SPECIAL;
......@@ -674,7 +673,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
file = lo->file;
lo->file = NULL;
nbd_clear_que(lo);
printk(KERN_WARNING "%s: queue cleared\n", lo->disk->disk_name);
dev_warn(disk_to_dev(lo->disk), "queue cleared\n");
if (file)
fput(file);
lo->bytesize = 0;
......@@ -694,8 +693,8 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
return 0;
case NBD_PRINT_DEBUG:
printk(KERN_INFO "%s: next = %p, prev = %p, head = %p\n",
bdev->bd_disk->disk_name,
dev_info(disk_to_dev(lo->disk),
"next = %p, prev = %p, head = %p\n",
lo->queue_head.next, lo->queue_head.prev,
&lo->queue_head);
return 0;
......@@ -745,7 +744,7 @@ static int __init nbd_init(void)
BUILD_BUG_ON(sizeof(struct nbd_request) != 28);
if (max_part < 0) {
printk(KERN_CRIT "nbd: max_part must be >= 0\n");
printk(KERN_ERR "nbd: max_part must be >= 0\n");
return -EINVAL;
}
......
......@@ -39,6 +39,9 @@
#include <linux/list.h>
#include <linux/delay.h>
#include <linux/freezer.h>
#include <linux/loop.h>
#include <linux/falloc.h>
#include <linux/fs.h>
#include <xen/events.h>
#include <xen/page.h>
......@@ -258,13 +261,16 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id)
static void print_stats(struct xen_blkif *blkif)
{
pr_info("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d\n",
pr_info("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d"
" | ds %4d\n",
current->comm, blkif->st_oo_req,
blkif->st_rd_req, blkif->st_wr_req, blkif->st_f_req);
blkif->st_rd_req, blkif->st_wr_req,
blkif->st_f_req, blkif->st_ds_req);
blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
blkif->st_rd_req = 0;
blkif->st_wr_req = 0;
blkif->st_oo_req = 0;
blkif->st_ds_req = 0;
}
int xen_blkif_schedule(void *arg)
......@@ -410,6 +416,59 @@ static int xen_blkbk_map(struct blkif_request *req,
return ret;
}
static void xen_blk_discard(struct xen_blkif *blkif, struct blkif_request *req)
{
int err = 0;
int status = BLKIF_RSP_OKAY;
struct block_device *bdev = blkif->vbd.bdev;
if (blkif->blk_backend_type == BLKIF_BACKEND_PHY)
/* just forward the discard request */
err = blkdev_issue_discard(bdev,
req->u.discard.sector_number,
req->u.discard.nr_sectors,
GFP_KERNEL, 0);
else if (blkif->blk_backend_type == BLKIF_BACKEND_FILE) {
/* punch a hole in the backing file */
struct loop_device *lo = bdev->bd_disk->private_data;
struct file *file = lo->lo_backing_file;
if (file->f_op->fallocate)
err = file->f_op->fallocate(file,
FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
req->u.discard.sector_number << 9,
req->u.discard.nr_sectors << 9);
else
err = -EOPNOTSUPP;
} else
err = -EOPNOTSUPP;
if (err == -EOPNOTSUPP) {
pr_debug(DRV_PFX "discard op failed, not supported\n");
status = BLKIF_RSP_EOPNOTSUPP;
} else if (err)
status = BLKIF_RSP_ERROR;
make_response(blkif, req->id, req->operation, status);
}
static void xen_blk_drain_io(struct xen_blkif *blkif)
{
atomic_set(&blkif->drain, 1);
do {
/* The initial value is one, and one refcnt taken at the
* start of the xen_blkif_schedule thread. */
if (atomic_read(&blkif->refcnt) <= 2)
break;
wait_for_completion_interruptible_timeout(
&blkif->drain_complete, HZ);
if (!atomic_read(&blkif->drain))
break;
} while (!kthread_should_stop());
atomic_set(&blkif->drain, 0);
}
/*
* Completion callback on the bio's. Called as bh->b_end_io()
*/
......@@ -422,6 +481,11 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
pr_debug(DRV_PFX "flush diskcache op failed, not supported\n");
xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0);
pending_req->status = BLKIF_RSP_EOPNOTSUPP;
} else if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
(error == -EOPNOTSUPP)) {
pr_debug(DRV_PFX "write barrier op failed, not supported\n");
xen_blkbk_barrier(XBT_NIL, pending_req->blkif->be, 0);
pending_req->status = BLKIF_RSP_EOPNOTSUPP;
} else if (error) {
pr_debug(DRV_PFX "Buffer not up-to-date at end of operation,"
" error=%d\n", error);
......@@ -438,6 +502,10 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
make_response(pending_req->blkif, pending_req->id,
pending_req->operation, pending_req->status);
xen_blkif_put(pending_req->blkif);
if (atomic_read(&pending_req->blkif->refcnt) <= 2) {
if (atomic_read(&pending_req->blkif->drain))
complete(&pending_req->blkif->drain_complete);
}
free_req(pending_req);
}
}
......@@ -532,7 +600,6 @@ do_block_io_op(struct xen_blkif *blkif)
return more_to_do;
}
/*
* Transmutation of the 'struct blkif_request' to a proper 'struct bio'
* and call the 'submit_bio' to pass it to the underlying storage.
......@@ -549,6 +616,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
int i, nbio = 0;
int operation;
struct blk_plug plug;
bool drain = false;
switch (req->operation) {
case BLKIF_OP_READ:
......@@ -559,11 +627,16 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
blkif->st_wr_req++;
operation = WRITE_ODIRECT;
break;
case BLKIF_OP_WRITE_BARRIER:
drain = true;
case BLKIF_OP_FLUSH_DISKCACHE:
blkif->st_f_req++;
operation = WRITE_FLUSH;
break;
case BLKIF_OP_WRITE_BARRIER:
case BLKIF_OP_DISCARD:
blkif->st_ds_req++;
operation = REQ_DISCARD;
break;
default:
operation = 0; /* make gcc happy */
goto fail_response;
......@@ -572,7 +645,8 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
/* Check that the number of segments is sane. */
nseg = req->nr_segments;
if (unlikely(nseg == 0 && operation != WRITE_FLUSH) ||
if (unlikely(nseg == 0 && operation != WRITE_FLUSH &&
operation != REQ_DISCARD) ||
unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
pr_debug(DRV_PFX "Bad number of segments in request (%d)\n",
nseg);
......@@ -621,16 +695,25 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
}
}
/* Wait on all outstanding I/O's and once that has been completed
* issue the WRITE_FLUSH.
*/
if (drain)
xen_blk_drain_io(pending_req->blkif);
/*
* If we have failed at this point, we need to undo the M2P override,
* set gnttab_set_unmap_op on all of the grant references and perform
* the hypercall to unmap the grants - that is all done in
* xen_blkbk_unmap.
*/
if (xen_blkbk_map(req, pending_req, seg))
if (operation != REQ_DISCARD && xen_blkbk_map(req, pending_req, seg))
goto fail_flush;
/* This corresponding xen_blkif_put is done in __end_block_io_op */
/*
* This corresponding xen_blkif_put is done in __end_block_io_op, or
* below (in "!bio") if we are handling a BLKIF_OP_DISCARD.
*/
xen_blkif_get(blkif);
for (i = 0; i < nseg; i++) {
......@@ -654,10 +737,11 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
preq.sector_number += seg[i].nsec;
}
/* This will be hit if the operation was a flush. */
/* This will be hit if the operation was a flush or discard. */
if (!bio) {
BUG_ON(operation != WRITE_FLUSH);
BUG_ON(operation != WRITE_FLUSH && operation != REQ_DISCARD);
if (operation == WRITE_FLUSH) {
bio = bio_alloc(GFP_KERNEL, 0);
if (unlikely(bio == NULL))
goto fail_put_bio;
......@@ -666,6 +750,12 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
bio->bi_bdev = preq.bdev;
bio->bi_private = pending_req;
bio->bi_end_io = end_block_io_op;
} else if (operation == REQ_DISCARD) {
xen_blk_discard(blkif, req);
xen_blkif_put(blkif);
free_req(pending_req);
return 0;
}
}
/*
......@@ -685,7 +775,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
if (operation == READ)
blkif->st_rd_sect += preq.nr_sects;
else if (operation == WRITE || operation == WRITE_FLUSH)
else if (operation & WRITE)
blkif->st_wr_sect += preq.nr_sects;
return 0;
......@@ -765,9 +855,9 @@ static int __init xen_blkif_init(void)
mmap_pages = xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
blkbk->pending_reqs = kmalloc(sizeof(blkbk->pending_reqs[0]) *
blkbk->pending_reqs = kzalloc(sizeof(blkbk->pending_reqs[0]) *
xen_blkif_reqs, GFP_KERNEL);
blkbk->pending_grant_handles = kzalloc(sizeof(blkbk->pending_grant_handles[0]) *
blkbk->pending_grant_handles = kmalloc(sizeof(blkbk->pending_grant_handles[0]) *
mmap_pages, GFP_KERNEL);
blkbk->pending_pages = kzalloc(sizeof(blkbk->pending_pages[0]) *
mmap_pages, GFP_KERNEL);
......@@ -790,8 +880,6 @@ static int __init xen_blkif_init(void)
if (rc)
goto failed_init;
memset(blkbk->pending_reqs, 0, sizeof(blkbk->pending_reqs));
INIT_LIST_HEAD(&blkbk->pending_free);
spin_lock_init(&blkbk->pending_free_lock);
init_waitqueue_head(&blkbk->pending_free_wq);
......
......@@ -62,13 +62,26 @@ struct blkif_common_response {
/* i386 protocol version */
#pragma pack(push, 4)
struct blkif_x86_32_request_rw {
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
};
struct blkif_x86_32_request_discard {
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
uint64_t nr_sectors;
};
struct blkif_x86_32_request {
uint8_t operation; /* BLKIF_OP_??? */
uint8_t nr_segments; /* number of segments */
blkif_vdev_t handle; /* only for read/write requests */
uint64_t id; /* private guest value, echoed in resp */
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
union {
struct blkif_x86_32_request_rw rw;
struct blkif_x86_32_request_discard discard;
} u;
};
struct blkif_x86_32_response {
uint64_t id; /* copied from request */
......@@ -78,13 +91,26 @@ struct blkif_x86_32_response {
#pragma pack(pop)
/* x86_64 protocol version */
struct blkif_x86_64_request_rw {
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
};
struct blkif_x86_64_request_discard {
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
uint64_t nr_sectors;
};
struct blkif_x86_64_request {
uint8_t operation; /* BLKIF_OP_??? */
uint8_t nr_segments; /* number of segments */
blkif_vdev_t handle; /* only for read/write requests */
uint64_t __attribute__((__aligned__(8))) id;
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
union {
struct blkif_x86_64_request_rw rw;
struct blkif_x86_64_request_discard discard;
} u;
};
struct blkif_x86_64_response {
uint64_t __attribute__((__aligned__(8))) id;
......@@ -112,6 +138,11 @@ enum blkif_protocol {
BLKIF_PROTOCOL_X86_64 = 3,
};
enum blkif_backend_type {
BLKIF_BACKEND_PHY = 1,
BLKIF_BACKEND_FILE = 2,
};
struct xen_vbd {
/* What the domain refers to this vbd as. */
blkif_vdev_t handle;
......@@ -137,6 +168,7 @@ struct xen_blkif {
unsigned int irq;
/* Comms information. */
enum blkif_protocol blk_protocol;
enum blkif_backend_type blk_backend_type;
union blkif_back_rings blk_rings;
struct vm_struct *blk_ring_area;
/* The VBD attached to this interface. */
......@@ -148,6 +180,9 @@ struct xen_blkif {
atomic_t refcnt;
wait_queue_head_t wq;
/* for barrier (drain) requests */
struct completion drain_complete;
atomic_t drain;
/* One thread per one blkif. */
struct task_struct *xenblkd;
unsigned int waiting_reqs;
......@@ -158,6 +193,7 @@ struct xen_blkif {
int st_wr_req;
int st_oo_req;
int st_f_req;
int st_ds_req;
int st_rd_sect;
int st_wr_sect;
......@@ -181,7 +217,7 @@ struct xen_blkif {
struct phys_req {
unsigned short dev;
unsigned short nr_sects;
blkif_sector_t nr_sects;
struct block_device *bdev;
blkif_sector_t sector_number;
};
......@@ -195,6 +231,8 @@ int xen_blkif_schedule(void *arg);
int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
struct backend_info *be, int state);
int xen_blkbk_barrier(struct xenbus_transaction xbt,
struct backend_info *be, int state);
struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be);
static inline void blkif_get_x86_32_req(struct blkif_request *dst,
......@@ -205,12 +243,25 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst,
dst->nr_segments = src->nr_segments;
dst->handle = src->handle;
dst->id = src->id;
dst->u.rw.sector_number = src->sector_number;
switch (src->operation) {
case BLKIF_OP_READ:
case BLKIF_OP_WRITE:
case BLKIF_OP_WRITE_BARRIER:
case BLKIF_OP_FLUSH_DISKCACHE:
dst->u.rw.sector_number = src->u.rw.sector_number;
barrier();
if (n > dst->nr_segments)
n = dst->nr_segments;
for (i = 0; i < n; i++)
dst->u.rw.seg[i] = src->seg[i];
dst->u.rw.seg[i] = src->u.rw.seg[i];
break;
case BLKIF_OP_DISCARD:
dst->u.discard.sector_number = src->u.discard.sector_number;
dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
break;
default:
break;
}
}
static inline void blkif_get_x86_64_req(struct blkif_request *dst,
......@@ -221,12 +272,25 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst,
dst->nr_segments = src->nr_segments;
dst->handle = src->handle;
dst->id = src->id;
dst->u.rw.sector_number = src->sector_number;
switch (src->operation) {
case BLKIF_OP_READ:
case BLKIF_OP_WRITE:
case BLKIF_OP_WRITE_BARRIER:
case BLKIF_OP_FLUSH_DISKCACHE:
dst->u.rw.sector_number = src->u.rw.sector_number;
barrier();
if (n > dst->nr_segments)
n = dst->nr_segments;
for (i = 0; i < n; i++)
dst->u.rw.seg[i] = src->seg[i];
dst->u.rw.seg[i] = src->u.rw.seg[i];
break;
case BLKIF_OP_DISCARD:
dst->u.discard.sector_number = src->u.discard.sector_number;
dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
break;
default:
break;
}
}
#endif /* __XEN_BLKIF__BACKEND__COMMON_H__ */
......@@ -114,6 +114,8 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
spin_lock_init(&blkif->blk_ring_lock);
atomic_set(&blkif->refcnt, 1);
init_waitqueue_head(&blkif->wq);
init_completion(&blkif->drain_complete);
atomic_set(&blkif->drain, 0);
blkif->st_print = jiffies;
init_waitqueue_head(&blkif->waiting_to_free);
......@@ -272,6 +274,7 @@ VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req);
VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req);
VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req);
VBD_SHOW(f_req, "%d\n", be->blkif->st_f_req);
VBD_SHOW(ds_req, "%d\n", be->blkif->st_ds_req);
VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect);
VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect);
......@@ -280,6 +283,7 @@ static struct attribute *xen_vbdstat_attrs[] = {
&dev_attr_rd_req.attr,
&dev_attr_wr_req.attr,
&dev_attr_f_req.attr,
&dev_attr_ds_req.attr,
&dev_attr_rd_sect.attr,
&dev_attr_wr_sect.attr,
NULL
......@@ -419,6 +423,73 @@ int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
return err;
}
int xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info *be)
{
struct xenbus_device *dev = be->dev;
struct xen_blkif *blkif = be->blkif;
char *type;
int err;
int state = 0;
type = xenbus_read(XBT_NIL, dev->nodename, "type", NULL);
if (!IS_ERR(type)) {
if (strncmp(type, "file", 4) == 0) {
state = 1;
blkif->blk_backend_type = BLKIF_BACKEND_FILE;
}
if (strncmp(type, "phy", 3) == 0) {
struct block_device *bdev = be->blkif->vbd.bdev;
struct request_queue *q = bdev_get_queue(bdev);
if (blk_queue_discard(q)) {
err = xenbus_printf(xbt, dev->nodename,
"discard-granularity", "%u",
q->limits.discard_granularity);
if (err) {
xenbus_dev_fatal(dev, err,
"writing discard-granularity");
goto kfree;
}
err = xenbus_printf(xbt, dev->nodename,
"discard-alignment", "%u",
q->limits.discard_alignment);
if (err) {
xenbus_dev_fatal(dev, err,
"writing discard-alignment");
goto kfree;
}
state = 1;
blkif->blk_backend_type = BLKIF_BACKEND_PHY;
}
}
} else {
err = PTR_ERR(type);
xenbus_dev_fatal(dev, err, "reading type");
goto out;
}
err = xenbus_printf(xbt, dev->nodename, "feature-discard",
"%d", state);
if (err)
xenbus_dev_fatal(dev, err, "writing feature-discard");
kfree:
kfree(type);
out:
return err;
}
int xen_blkbk_barrier(struct xenbus_transaction xbt,
struct backend_info *be, int state)
{
struct xenbus_device *dev = be->dev;
int err;
err = xenbus_printf(xbt, dev->nodename, "feature-barrier",
"%d", state);
if (err)
xenbus_dev_fatal(dev, err, "writing feature-barrier");
return err;
}
/*
* Entry point to this code when a new device is created. Allocate the basic
* structures, and watch the store waiting for the hotplug scripts to tell us
......@@ -650,6 +721,11 @@ static void connect(struct backend_info *be)
if (err)
goto abort;
err = xen_blkbk_discard(xbt, be);
/* If we can't advertise it is OK. */
err = xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support);
err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
(unsigned long long)vbd_sz(&be->blkif->vbd));
if (err) {
......
......@@ -98,6 +98,9 @@ struct blkfront_info
unsigned long shadow_free;
unsigned int feature_flush;
unsigned int flush_op;
unsigned int feature_discard;
unsigned int discard_granularity;
unsigned int discard_alignment;
int is_ready;
};
......@@ -302,6 +305,12 @@ static int blkif_queue_request(struct request *req)
ring_req->operation = info->flush_op;
}
if (unlikely(req->cmd_flags & REQ_DISCARD)) {
/* id, sector_number and handle are set above. */
ring_req->operation = BLKIF_OP_DISCARD;
ring_req->nr_segments = 0;
ring_req->u.discard.nr_sectors = blk_rq_sectors(req);
} else {
ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
......@@ -317,7 +326,7 @@ static int blkif_queue_request(struct request *req)
ref,
info->xbdev->otherend_id,
buffer_mfn,
rq_data_dir(req) );
rq_data_dir(req));
info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
ring_req->u.rw.seg[i] =
......@@ -326,6 +335,7 @@ static int blkif_queue_request(struct request *req)
.first_sect = fsect,
.last_sect = lsect };
}
}
info->ring.req_prod_pvt++;
......@@ -370,7 +380,9 @@ static void do_blkif_request(struct request_queue *rq)
blk_start_request(req);
if (req->cmd_type != REQ_TYPE_FS) {
if ((req->cmd_type != REQ_TYPE_FS) ||
((req->cmd_flags & (REQ_FLUSH | REQ_FUA)) &&
!info->flush_op)) {
__blk_end_request_all(req, -EIO);
continue;
}
......@@ -399,6 +411,7 @@ static void do_blkif_request(struct request_queue *rq)
static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
{
struct request_queue *rq;
struct blkfront_info *info = gd->private_data;
rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
if (rq == NULL)
......@@ -406,6 +419,13 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
if (info->feature_discard) {
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq);
blk_queue_max_discard_sectors(rq, get_capacity(gd));
rq->limits.discard_granularity = info->discard_granularity;
rq->limits.discard_alignment = info->discard_alignment;
}
/* Hard sector size and max sectors impersonate the equiv. hardware. */
blk_queue_logical_block_size(rq, sector_size);
blk_queue_max_hw_sectors(rq, 512);
......@@ -722,6 +742,17 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
switch (bret->operation) {
case BLKIF_OP_DISCARD:
if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
struct request_queue *rq = info->rq;
printk(KERN_WARNING "blkfront: %s: discard op failed\n",
info->gd->disk_name);
error = -EOPNOTSUPP;
info->feature_discard = 0;
queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
}
__blk_end_request_all(req, error);
break;
case BLKIF_OP_FLUSH_DISKCACHE:
case BLKIF_OP_WRITE_BARRIER:
if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
......@@ -1098,6 +1129,33 @@ blkfront_closing(struct blkfront_info *info)
bdput(bdev);
}
static void blkfront_setup_discard(struct blkfront_info *info)
{
int err;
char *type;
unsigned int discard_granularity;
unsigned int discard_alignment;
type = xenbus_read(XBT_NIL, info->xbdev->otherend, "type", NULL);
if (IS_ERR(type))
return;
if (strncmp(type, "phy", 3) == 0) {
err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
"discard-granularity", "%u", &discard_granularity,
"discard-alignment", "%u", &discard_alignment,
NULL);
if (!err) {
info->feature_discard = 1;
info->discard_granularity = discard_granularity;
info->discard_alignment = discard_alignment;
}
} else if (strncmp(type, "file", 4) == 0)
info->feature_discard = 1;
kfree(type);
}
/*
* Invoked when the backend is finally 'ready' (and has told produced
* the details about the physical device - #sectors, size, etc).
......@@ -1108,7 +1166,7 @@ static void blkfront_connect(struct blkfront_info *info)
unsigned long sector_size;
unsigned int binfo;
int err;
int barrier, flush;
int barrier, flush, discard;
switch (info->connected) {
case BLKIF_STATE_CONNECTED:
......@@ -1179,6 +1237,13 @@ static void blkfront_connect(struct blkfront_info *info)
info->flush_op = BLKIF_OP_FLUSH_DISKCACHE;
}
err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
"feature-discard", "%d", &discard,
NULL);
if (!err && discard)
blkfront_setup_discard(info);
err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
if (err) {
xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
......@@ -1385,6 +1450,8 @@ static struct xenbus_driver blkfront = {
static int __init xlblk_init(void)
{
int ret;
if (!xen_domain())
return -ENODEV;
......@@ -1394,7 +1461,13 @@ static int __init xlblk_init(void)
return -ENODEV;
}
return xenbus_register_frontend(&blkfront);
ret = xenbus_register_frontend(&blkfront);
if (ret) {
unregister_blkdev(XENVBD_MAJOR, DEV_NAME);
return ret;
}
return 0;
}
module_init(xlblk_init);
......
......@@ -3300,6 +3300,13 @@ static int hpsa_controller_hard_reset(struct pci_dev *pdev,
pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
pmcsr |= PCI_D0;
pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
/*
* The P600 requires a small delay when changing states.
* Otherwise we may think the board did not reset and we bail.
* This for kdump only and is particular to the P600.
*/
msleep(500);
}
return 0;
}
......
......@@ -971,7 +971,7 @@ static void flush_disk(struct block_device *bdev, bool kill_dirty)
if (!bdev->bd_disk)
return;
if (disk_partitionable(bdev->bd_disk))
if (disk_part_scan_enabled(bdev->bd_disk))
bdev->bd_invalidated = 1;
}
......
......@@ -131,6 +131,7 @@ struct hd_struct {
#define GENHD_FL_EXT_DEVT 64 /* allow extended devt */
#define GENHD_FL_NATIVE_CAPACITY 128
#define GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE 256
#define GENHD_FL_NO_PART_SCAN 512
enum {
DISK_EVENT_MEDIA_CHANGE = 1 << 0, /* media changed */
......@@ -238,9 +239,10 @@ static inline int disk_max_parts(struct gendisk *disk)
return disk->minors;
}
static inline bool disk_partitionable(struct gendisk *disk)
static inline bool disk_part_scan_enabled(struct gendisk *disk)
{
return disk_max_parts(disk) > 1;
return disk_max_parts(disk) > 1 &&
!(disk->flags & GENHD_FL_NO_PART_SCAN);
}
static inline dev_t disk_devt(struct gendisk *disk)
......
......@@ -74,6 +74,7 @@ struct loop_device {
enum {
LO_FLAGS_READ_ONLY = 1,
LO_FLAGS_AUTOCLEAR = 4,
LO_FLAGS_PARTSCAN = 8,
};
#include <asm/posix_types.h> /* for __kernel_old_dev_t */
......
......@@ -57,6 +57,36 @@ typedef uint64_t blkif_sector_t;
* "feature-flush-cache" node!
*/
#define BLKIF_OP_FLUSH_DISKCACHE 3
/*
* Recognised only if "feature-discard" is present in backend xenbus info.
* The "feature-discard" node contains a boolean indicating whether trim
* (ATA) or unmap (SCSI) - conviently called discard requests are likely
* to succeed or fail. Either way, a discard request
* may fail at any time with BLKIF_RSP_EOPNOTSUPP if it is unsupported by
* the underlying block-device hardware. The boolean simply indicates whether
* or not it is worthwhile for the frontend to attempt discard requests.
* If a backend does not recognise BLKIF_OP_DISCARD, it should *not*
* create the "feature-discard" node!
*
* Discard operation is a request for the underlying block device to mark
* extents to be erased. However, discard does not guarantee that the blocks
* will be erased from the device - it is just a hint to the device
* controller that these blocks are no longer in use. What the device
* controller does with that information is left to the controller.
* Discard operations are passed with sector_number as the
* sector index to begin discard operations at and nr_sectors as the number of
* sectors to be discarded. The specified sectors should be discarded if the
* underlying block device supports trim (ATA) or unmap (SCSI) operations,
* or a BLKIF_RSP_EOPNOTSUPP should be returned.
* More information about trim/unmap operations at:
* http://t13.org/Documents/UploadedDocuments/docs2008/
* e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc
* http://www.seagate.com/staticfiles/support/disc/manuals/
* Interface%20manuals/100293068c.pdf
*/
#define BLKIF_OP_DISCARD 5
/*
* Maximum scatter/gather segments per request.
* This is carefully chosen so that sizeof(struct blkif_ring) <= PAGE_SIZE.
......@@ -74,6 +104,11 @@ struct blkif_request_rw {
} seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
};
struct blkif_request_discard {
blkif_sector_t sector_number;
uint64_t nr_sectors;
};
struct blkif_request {
uint8_t operation; /* BLKIF_OP_??? */
uint8_t nr_segments; /* number of segments */
......@@ -81,6 +116,7 @@ struct blkif_request {
uint64_t id; /* private guest value, echoed in resp */
union {
struct blkif_request_rw rw;
struct blkif_request_discard discard;
} u;
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment