Commit 35134319 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'fsnotify_for_v5.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs

Pull fsnotify updates from Jan Kara:
 "fsnotify speedups when notification actually isn't used and support
  for identifying processes which caused fanotify events through pidfd
  instead of normal pid"

* tag 'fsnotify_for_v5.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs:
  fsnotify: optimize the case of no marks of any type
  fsnotify: count all objects with attached connectors
  fsnotify: count s_fsnotify_inode_refs for attached connectors
  fsnotify: replace igrab() with ihold() on attach connector
  fanotify: add pidfd support to the fanotify API
  fanotify: introduce a generic info record copying helper
  fanotify: minor cosmetic adjustments to fid labels
  kernel/pid.c: implement additional checks upon pidfd_create() parameters
  kernel/pid.c: remove static qualifier from pidfd_create()
parents 2287a51b e43de7f0
This diff is collapsed.
...@@ -87,15 +87,15 @@ static void fsnotify_unmount_inodes(struct super_block *sb) ...@@ -87,15 +87,15 @@ static void fsnotify_unmount_inodes(struct super_block *sb)
if (iput_inode) if (iput_inode)
iput(iput_inode); iput(iput_inode);
/* Wait for outstanding inode references from connectors */
wait_var_event(&sb->s_fsnotify_inode_refs,
!atomic_long_read(&sb->s_fsnotify_inode_refs));
} }
void fsnotify_sb_delete(struct super_block *sb) void fsnotify_sb_delete(struct super_block *sb)
{ {
fsnotify_unmount_inodes(sb); fsnotify_unmount_inodes(sb);
fsnotify_clear_marks_by_sb(sb); fsnotify_clear_marks_by_sb(sb);
/* Wait for outstanding object references from connectors */
wait_var_event(&sb->s_fsnotify_connectors,
!atomic_long_read(&sb->s_fsnotify_connectors));
} }
/* /*
......
...@@ -27,6 +27,21 @@ static inline struct super_block *fsnotify_conn_sb( ...@@ -27,6 +27,21 @@ static inline struct super_block *fsnotify_conn_sb(
return container_of(conn->obj, struct super_block, s_fsnotify_marks); return container_of(conn->obj, struct super_block, s_fsnotify_marks);
} }
static inline struct super_block *fsnotify_connector_sb(
struct fsnotify_mark_connector *conn)
{
switch (conn->type) {
case FSNOTIFY_OBJ_TYPE_INODE:
return fsnotify_conn_inode(conn)->i_sb;
case FSNOTIFY_OBJ_TYPE_VFSMOUNT:
return fsnotify_conn_mount(conn)->mnt.mnt_sb;
case FSNOTIFY_OBJ_TYPE_SB:
return fsnotify_conn_sb(conn);
default:
return NULL;
}
}
/* destroy all events sitting in this groups notification queue */ /* destroy all events sitting in this groups notification queue */
extern void fsnotify_flush_notify(struct fsnotify_group *group); extern void fsnotify_flush_notify(struct fsnotify_group *group);
......
...@@ -169,6 +169,37 @@ static void fsnotify_connector_destroy_workfn(struct work_struct *work) ...@@ -169,6 +169,37 @@ static void fsnotify_connector_destroy_workfn(struct work_struct *work)
} }
} }
static void fsnotify_get_inode_ref(struct inode *inode)
{
ihold(inode);
atomic_long_inc(&inode->i_sb->s_fsnotify_connectors);
}
static void fsnotify_put_inode_ref(struct inode *inode)
{
struct super_block *sb = inode->i_sb;
iput(inode);
if (atomic_long_dec_and_test(&sb->s_fsnotify_connectors))
wake_up_var(&sb->s_fsnotify_connectors);
}
static void fsnotify_get_sb_connectors(struct fsnotify_mark_connector *conn)
{
struct super_block *sb = fsnotify_connector_sb(conn);
if (sb)
atomic_long_inc(&sb->s_fsnotify_connectors);
}
static void fsnotify_put_sb_connectors(struct fsnotify_mark_connector *conn)
{
struct super_block *sb = fsnotify_connector_sb(conn);
if (sb && atomic_long_dec_and_test(&sb->s_fsnotify_connectors))
wake_up_var(&sb->s_fsnotify_connectors);
}
static void *fsnotify_detach_connector_from_object( static void *fsnotify_detach_connector_from_object(
struct fsnotify_mark_connector *conn, struct fsnotify_mark_connector *conn,
unsigned int *type) unsigned int *type)
...@@ -182,13 +213,13 @@ static void *fsnotify_detach_connector_from_object( ...@@ -182,13 +213,13 @@ static void *fsnotify_detach_connector_from_object(
if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) { if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) {
inode = fsnotify_conn_inode(conn); inode = fsnotify_conn_inode(conn);
inode->i_fsnotify_mask = 0; inode->i_fsnotify_mask = 0;
atomic_long_inc(&inode->i_sb->s_fsnotify_inode_refs);
} else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) { } else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0; fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0;
} else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) { } else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) {
fsnotify_conn_sb(conn)->s_fsnotify_mask = 0; fsnotify_conn_sb(conn)->s_fsnotify_mask = 0;
} }
fsnotify_put_sb_connectors(conn);
rcu_assign_pointer(*(conn->obj), NULL); rcu_assign_pointer(*(conn->obj), NULL);
conn->obj = NULL; conn->obj = NULL;
conn->type = FSNOTIFY_OBJ_TYPE_DETACHED; conn->type = FSNOTIFY_OBJ_TYPE_DETACHED;
...@@ -209,19 +240,12 @@ static void fsnotify_final_mark_destroy(struct fsnotify_mark *mark) ...@@ -209,19 +240,12 @@ static void fsnotify_final_mark_destroy(struct fsnotify_mark *mark)
/* Drop object reference originally held by a connector */ /* Drop object reference originally held by a connector */
static void fsnotify_drop_object(unsigned int type, void *objp) static void fsnotify_drop_object(unsigned int type, void *objp)
{ {
struct inode *inode;
struct super_block *sb;
if (!objp) if (!objp)
return; return;
/* Currently only inode references are passed to be dropped */ /* Currently only inode references are passed to be dropped */
if (WARN_ON_ONCE(type != FSNOTIFY_OBJ_TYPE_INODE)) if (WARN_ON_ONCE(type != FSNOTIFY_OBJ_TYPE_INODE))
return; return;
inode = objp; fsnotify_put_inode_ref(objp);
sb = inode->i_sb;
iput(inode);
if (atomic_long_dec_and_test(&sb->s_fsnotify_inode_refs))
wake_up_var(&sb->s_fsnotify_inode_refs);
} }
void fsnotify_put_mark(struct fsnotify_mark *mark) void fsnotify_put_mark(struct fsnotify_mark *mark)
...@@ -493,8 +517,12 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, ...@@ -493,8 +517,12 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
conn->fsid.val[0] = conn->fsid.val[1] = 0; conn->fsid.val[0] = conn->fsid.val[1] = 0;
conn->flags = 0; conn->flags = 0;
} }
if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) {
inode = igrab(fsnotify_conn_inode(conn)); inode = fsnotify_conn_inode(conn);
fsnotify_get_inode_ref(inode);
}
fsnotify_get_sb_connectors(conn);
/* /*
* cmpxchg() provides the barrier so that readers of *connp can see * cmpxchg() provides the barrier so that readers of *connp can see
* only initialized structure * only initialized structure
...@@ -502,7 +530,7 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, ...@@ -502,7 +530,7 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
if (cmpxchg(connp, NULL, conn)) { if (cmpxchg(connp, NULL, conn)) {
/* Someone else created list structure for us */ /* Someone else created list structure for us */
if (inode) if (inode)
iput(inode); fsnotify_put_inode_ref(inode);
kmem_cache_free(fsnotify_mark_connector_cachep, conn); kmem_cache_free(fsnotify_mark_connector_cachep, conn);
} }
......
...@@ -27,6 +27,8 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */ ...@@ -27,6 +27,8 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */
#define FANOTIFY_FID_BITS (FAN_REPORT_FID | FAN_REPORT_DFID_NAME) #define FANOTIFY_FID_BITS (FAN_REPORT_FID | FAN_REPORT_DFID_NAME)
#define FANOTIFY_INFO_MODES (FANOTIFY_FID_BITS | FAN_REPORT_PIDFD)
/* /*
* fanotify_init() flags that require CAP_SYS_ADMIN. * fanotify_init() flags that require CAP_SYS_ADMIN.
* We do not allow unprivileged groups to request permission events. * We do not allow unprivileged groups to request permission events.
...@@ -35,6 +37,7 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */ ...@@ -35,6 +37,7 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */
*/ */
#define FANOTIFY_ADMIN_INIT_FLAGS (FANOTIFY_PERM_CLASSES | \ #define FANOTIFY_ADMIN_INIT_FLAGS (FANOTIFY_PERM_CLASSES | \
FAN_REPORT_TID | \ FAN_REPORT_TID | \
FAN_REPORT_PIDFD | \
FAN_UNLIMITED_QUEUE | \ FAN_UNLIMITED_QUEUE | \
FAN_UNLIMITED_MARKS) FAN_UNLIMITED_MARKS)
......
...@@ -1507,8 +1507,11 @@ struct super_block { ...@@ -1507,8 +1507,11 @@ struct super_block {
/* Number of inodes with nlink == 0 but still referenced */ /* Number of inodes with nlink == 0 but still referenced */
atomic_long_t s_remove_count; atomic_long_t s_remove_count;
/* Pending fsnotify inode refs */ /*
atomic_long_t s_fsnotify_inode_refs; * Number of inode/mount/sb objects that are being watched, note that
* inodes objects are currently double-accounted.
*/
atomic_long_t s_fsnotify_connectors;
/* Being remounted read-only */ /* Being remounted read-only */
int s_readonly_remount; int s_readonly_remount;
......
...@@ -30,6 +30,9 @@ static inline void fsnotify_name(struct inode *dir, __u32 mask, ...@@ -30,6 +30,9 @@ static inline void fsnotify_name(struct inode *dir, __u32 mask,
struct inode *child, struct inode *child,
const struct qstr *name, u32 cookie) const struct qstr *name, u32 cookie)
{ {
if (atomic_long_read(&dir->i_sb->s_fsnotify_connectors) == 0)
return;
fsnotify(mask, child, FSNOTIFY_EVENT_INODE, dir, name, NULL, cookie); fsnotify(mask, child, FSNOTIFY_EVENT_INODE, dir, name, NULL, cookie);
} }
...@@ -41,6 +44,9 @@ static inline void fsnotify_dirent(struct inode *dir, struct dentry *dentry, ...@@ -41,6 +44,9 @@ static inline void fsnotify_dirent(struct inode *dir, struct dentry *dentry,
static inline void fsnotify_inode(struct inode *inode, __u32 mask) static inline void fsnotify_inode(struct inode *inode, __u32 mask)
{ {
if (atomic_long_read(&inode->i_sb->s_fsnotify_connectors) == 0)
return;
if (S_ISDIR(inode->i_mode)) if (S_ISDIR(inode->i_mode))
mask |= FS_ISDIR; mask |= FS_ISDIR;
...@@ -53,6 +59,9 @@ static inline int fsnotify_parent(struct dentry *dentry, __u32 mask, ...@@ -53,6 +59,9 @@ static inline int fsnotify_parent(struct dentry *dentry, __u32 mask,
{ {
struct inode *inode = d_inode(dentry); struct inode *inode = d_inode(dentry);
if (atomic_long_read(&inode->i_sb->s_fsnotify_connectors) == 0)
return 0;
if (S_ISDIR(inode->i_mode)) { if (S_ISDIR(inode->i_mode)) {
mask |= FS_ISDIR; mask |= FS_ISDIR;
......
...@@ -78,6 +78,7 @@ struct file; ...@@ -78,6 +78,7 @@ struct file;
extern struct pid *pidfd_pid(const struct file *file); extern struct pid *pidfd_pid(const struct file *file);
struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags); struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags);
int pidfd_create(struct pid *pid, unsigned int flags);
static inline struct pid *get_pid(struct pid *pid) static inline struct pid *get_pid(struct pid *pid)
{ {
......
...@@ -51,6 +51,7 @@ ...@@ -51,6 +51,7 @@
#define FAN_ENABLE_AUDIT 0x00000040 #define FAN_ENABLE_AUDIT 0x00000040
/* Flags to determine fanotify event format */ /* Flags to determine fanotify event format */
#define FAN_REPORT_PIDFD 0x00000080 /* Report pidfd for event->pid */
#define FAN_REPORT_TID 0x00000100 /* event->pid is thread id */ #define FAN_REPORT_TID 0x00000100 /* event->pid is thread id */
#define FAN_REPORT_FID 0x00000200 /* Report unique file id */ #define FAN_REPORT_FID 0x00000200 /* Report unique file id */
#define FAN_REPORT_DIR_FID 0x00000400 /* Report unique directory id */ #define FAN_REPORT_DIR_FID 0x00000400 /* Report unique directory id */
...@@ -123,6 +124,7 @@ struct fanotify_event_metadata { ...@@ -123,6 +124,7 @@ struct fanotify_event_metadata {
#define FAN_EVENT_INFO_TYPE_FID 1 #define FAN_EVENT_INFO_TYPE_FID 1
#define FAN_EVENT_INFO_TYPE_DFID_NAME 2 #define FAN_EVENT_INFO_TYPE_DFID_NAME 2
#define FAN_EVENT_INFO_TYPE_DFID 3 #define FAN_EVENT_INFO_TYPE_DFID 3
#define FAN_EVENT_INFO_TYPE_PIDFD 4
/* Variable length info record following event metadata */ /* Variable length info record following event metadata */
struct fanotify_event_info_header { struct fanotify_event_info_header {
...@@ -148,6 +150,15 @@ struct fanotify_event_info_fid { ...@@ -148,6 +150,15 @@ struct fanotify_event_info_fid {
unsigned char handle[0]; unsigned char handle[0];
}; };
/*
* This structure is used for info records of type FAN_EVENT_INFO_TYPE_PIDFD.
* It holds a pidfd for the pid that was responsible for generating an event.
*/
struct fanotify_event_info_pidfd {
struct fanotify_event_info_header hdr;
__s32 pidfd;
};
struct fanotify_response { struct fanotify_response {
__s32 fd; __s32 fd;
__u32 response; __u32 response;
...@@ -160,6 +171,8 @@ struct fanotify_response { ...@@ -160,6 +171,8 @@ struct fanotify_response {
/* No fd set in event */ /* No fd set in event */
#define FAN_NOFD -1 #define FAN_NOFD -1
#define FAN_NOPIDFD FAN_NOFD
#define FAN_EPIDFD -2
/* Helper functions to deal with fanotify_event_metadata buffers */ /* Helper functions to deal with fanotify_event_metadata buffers */
#define FAN_EVENT_METADATA_LEN (sizeof(struct fanotify_event_metadata)) #define FAN_EVENT_METADATA_LEN (sizeof(struct fanotify_event_metadata))
......
...@@ -550,13 +550,21 @@ struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags) ...@@ -550,13 +550,21 @@ struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags)
* Note, that this function can only be called after the fd table has * Note, that this function can only be called after the fd table has
* been unshared to avoid leaking the pidfd to the new process. * been unshared to avoid leaking the pidfd to the new process.
* *
* This symbol should not be explicitly exported to loadable modules.
*
* Return: On success, a cloexec pidfd is returned. * Return: On success, a cloexec pidfd is returned.
* On error, a negative errno number will be returned. * On error, a negative errno number will be returned.
*/ */
static int pidfd_create(struct pid *pid, unsigned int flags) int pidfd_create(struct pid *pid, unsigned int flags)
{ {
int fd; int fd;
if (!pid || !pid_has_task(pid, PIDTYPE_TGID))
return -EINVAL;
if (flags & ~(O_NONBLOCK | O_RDWR | O_CLOEXEC))
return -EINVAL;
fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid), fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid),
flags | O_RDWR | O_CLOEXEC); flags | O_RDWR | O_CLOEXEC);
if (fd < 0) if (fd < 0)
...@@ -596,10 +604,7 @@ SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags) ...@@ -596,10 +604,7 @@ SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags)
if (!p) if (!p)
return -ESRCH; return -ESRCH;
if (pid_has_task(p, PIDTYPE_TGID))
fd = pidfd_create(p, flags); fd = pidfd_create(p, flags);
else
fd = -EINVAL;
put_pid(p); put_pid(p);
return fd; return fd;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment