Commit 4f664036 authored by Mikulas Patocka's avatar Mikulas Patocka Committed by Greg Kroah-Hartman

dm sysfs: fix a module unload race

commit 2995fa78 upstream.

This reverts commit be35f486 ("dm: wait until embedded kobject is
released before destroying a device") and provides an improved fix.

The kobject release code that calls the completion must be placed in a
non-module file, otherwise there is a module unload race (if the process
calling dm_kobject_release is preempted and the DM module unloaded after
the completion is triggered, but before dm_kobject_release returns).

To fix this race, this patch moves the completion code to dm-builtin.c
which is always compiled directly into the kernel if BLK_DEV_DM is
selected.

The patch introduces a new dm_kobject_holder structure, its purpose is
to keep the completion and kobject in one place, so that it can be
accessed from non-module code without the need to export the layout of
struct mapped_device to that code.
Signed-off-by: default avatarMikulas Patocka <mpatocka@redhat.com>
Signed-off-by: default avatarMike Snitzer <snitzer@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent b77d1777
...@@ -176,8 +176,12 @@ config MD_FAULTY ...@@ -176,8 +176,12 @@ config MD_FAULTY
source "drivers/md/bcache/Kconfig" source "drivers/md/bcache/Kconfig"
config BLK_DEV_DM_BUILTIN
boolean
config BLK_DEV_DM config BLK_DEV_DM
tristate "Device mapper support" tristate "Device mapper support"
select BLK_DEV_DM_BUILTIN
---help--- ---help---
Device-mapper is a low level volume manager. It works by allowing Device-mapper is a low level volume manager. It works by allowing
people to specify mappings for ranges of logical sectors. Various people to specify mappings for ranges of logical sectors. Various
......
...@@ -32,6 +32,7 @@ obj-$(CONFIG_MD_FAULTY) += faulty.o ...@@ -32,6 +32,7 @@ obj-$(CONFIG_MD_FAULTY) += faulty.o
obj-$(CONFIG_BCACHE) += bcache/ obj-$(CONFIG_BCACHE) += bcache/
obj-$(CONFIG_BLK_DEV_MD) += md-mod.o obj-$(CONFIG_BLK_DEV_MD) += md-mod.o
obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o
obj-$(CONFIG_BLK_DEV_DM_BUILTIN) += dm-builtin.o
obj-$(CONFIG_DM_BUFIO) += dm-bufio.o obj-$(CONFIG_DM_BUFIO) += dm-bufio.o
obj-$(CONFIG_DM_BIO_PRISON) += dm-bio-prison.o obj-$(CONFIG_DM_BIO_PRISON) += dm-bio-prison.o
obj-$(CONFIG_DM_CRYPT) += dm-crypt.o obj-$(CONFIG_DM_CRYPT) += dm-crypt.o
......
#include "dm.h"
/*
* The kobject release method must not be placed in the module itself,
* otherwise we are subject to module unload races.
*
* The release method is called when the last reference to the kobject is
* dropped. It may be called by any other kernel code that drops the last
* reference.
*
* The release method suffers from module unload race. We may prevent the
* module from being unloaded at the start of the release method (using
* increased module reference count or synchronizing against the release
* method), however there is no way to prevent the module from being
* unloaded at the end of the release method.
*
* If this code were placed in the dm module, the following race may
* happen:
* 1. Some other process takes a reference to dm kobject
* 2. The user issues ioctl function to unload the dm device
* 3. dm_sysfs_exit calls kobject_put, however the object is not released
* because of the other reference taken at step 1
* 4. dm_sysfs_exit waits on the completion
* 5. The other process that took the reference in step 1 drops it,
* dm_kobject_release is called from this process
* 6. dm_kobject_release calls complete()
* 7. a reschedule happens before dm_kobject_release returns
* 8. dm_sysfs_exit continues, the dm device is unloaded, module reference
* count is decremented
* 9. The user unloads the dm module
* 10. The other process that was rescheduled in step 7 continues to run,
* it is now executing code in unloaded module, so it crashes
*
* Note that if the process that takes the foreign reference to dm kobject
* has a low priority and the system is sufficiently loaded with
* higher-priority processes that prevent the low-priority process from
* being scheduled long enough, this bug may really happen.
*
* In order to fix this module unload race, we place the release method
* into a helper code that is compiled directly into the kernel.
*/
void dm_kobject_release(struct kobject *kobj)
{
complete(dm_get_completion_from_kobject(kobj));
}
EXPORT_SYMBOL(dm_kobject_release);
...@@ -79,11 +79,6 @@ static const struct sysfs_ops dm_sysfs_ops = { ...@@ -79,11 +79,6 @@ static const struct sysfs_ops dm_sysfs_ops = {
.show = dm_attr_show, .show = dm_attr_show,
}; };
static void dm_kobject_release(struct kobject *kobj)
{
complete(dm_get_completion_from_kobject(kobj));
}
/* /*
* dm kobject is embedded in mapped_device structure * dm kobject is embedded in mapped_device structure
* no need to define release function here * no need to define release function here
......
...@@ -184,11 +184,8 @@ struct mapped_device { ...@@ -184,11 +184,8 @@ struct mapped_device {
/* forced geometry settings */ /* forced geometry settings */
struct hd_geometry geometry; struct hd_geometry geometry;
/* sysfs handle */ /* kobject and completion */
struct kobject kobj; struct dm_kobject_holder kobj_holder;
/* wait until the kobject is released */
struct completion kobj_completion;
/* zero-length flush that will be cloned and submitted to targets */ /* zero-length flush that will be cloned and submitted to targets */
struct bio flush_bio; struct bio flush_bio;
...@@ -1907,7 +1904,7 @@ static struct mapped_device *alloc_dev(int minor) ...@@ -1907,7 +1904,7 @@ static struct mapped_device *alloc_dev(int minor)
init_waitqueue_head(&md->wait); init_waitqueue_head(&md->wait);
INIT_WORK(&md->work, dm_wq_work); INIT_WORK(&md->work, dm_wq_work);
init_waitqueue_head(&md->eventq); init_waitqueue_head(&md->eventq);
init_completion(&md->kobj_completion); init_completion(&md->kobj_holder.completion);
md->disk->major = _major; md->disk->major = _major;
md->disk->first_minor = minor; md->disk->first_minor = minor;
...@@ -2739,20 +2736,14 @@ struct gendisk *dm_disk(struct mapped_device *md) ...@@ -2739,20 +2736,14 @@ struct gendisk *dm_disk(struct mapped_device *md)
struct kobject *dm_kobject(struct mapped_device *md) struct kobject *dm_kobject(struct mapped_device *md)
{ {
return &md->kobj; return &md->kobj_holder.kobj;
} }
/*
* struct mapped_device should not be exported outside of dm.c
* so use this check to verify that kobj is part of md structure
*/
struct mapped_device *dm_get_from_kobject(struct kobject *kobj) struct mapped_device *dm_get_from_kobject(struct kobject *kobj)
{ {
struct mapped_device *md; struct mapped_device *md;
md = container_of(kobj, struct mapped_device, kobj); md = container_of(kobj, struct mapped_device, kobj_holder.kobj);
if (&md->kobj != kobj)
return NULL;
if (test_bit(DMF_FREEING, &md->flags) || if (test_bit(DMF_FREEING, &md->flags) ||
dm_deleting_md(md)) dm_deleting_md(md))
...@@ -2762,13 +2753,6 @@ struct mapped_device *dm_get_from_kobject(struct kobject *kobj) ...@@ -2762,13 +2753,6 @@ struct mapped_device *dm_get_from_kobject(struct kobject *kobj)
return md; return md;
} }
struct completion *dm_get_completion_from_kobject(struct kobject *kobj)
{
struct mapped_device *md = container_of(kobj, struct mapped_device, kobj);
return &md->kobj_completion;
}
int dm_suspended_md(struct mapped_device *md) int dm_suspended_md(struct mapped_device *md)
{ {
return test_bit(DMF_SUSPENDED, &md->flags); return test_bit(DMF_SUSPENDED, &md->flags);
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <linux/blkdev.h> #include <linux/blkdev.h>
#include <linux/hdreg.h> #include <linux/hdreg.h>
#include <linux/completion.h> #include <linux/completion.h>
#include <linux/kobject.h>
/* /*
* Suspend feature flags * Suspend feature flags
...@@ -126,11 +127,25 @@ void dm_interface_exit(void); ...@@ -126,11 +127,25 @@ void dm_interface_exit(void);
/* /*
* sysfs interface * sysfs interface
*/ */
struct dm_kobject_holder {
struct kobject kobj;
struct completion completion;
};
static inline struct completion *dm_get_completion_from_kobject(struct kobject *kobj)
{
return &container_of(kobj, struct dm_kobject_holder, kobj)->completion;
}
int dm_sysfs_init(struct mapped_device *md); int dm_sysfs_init(struct mapped_device *md);
void dm_sysfs_exit(struct mapped_device *md); void dm_sysfs_exit(struct mapped_device *md);
struct kobject *dm_kobject(struct mapped_device *md); struct kobject *dm_kobject(struct mapped_device *md);
struct mapped_device *dm_get_from_kobject(struct kobject *kobj); struct mapped_device *dm_get_from_kobject(struct kobject *kobj);
struct completion *dm_get_completion_from_kobject(struct kobject *kobj);
/*
* The kobject helper
*/
void dm_kobject_release(struct kobject *kobj);
/* /*
* Targets for linear and striped mappings * Targets for linear and striped mappings
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment