Commit 9cfe015a authored by Eric Dumazet's avatar Eric Dumazet Committed by Linus Torvalds

get rid of NR_OPEN and introduce a sysctl_nr_open

NR_OPEN (historically set to 1024*1024) actually forbids processes to open
more than 1024*1024 handles.

Unfortunatly some production servers hit the not so 'ridiculously high
value' of 1024*1024 file descriptors per process.

Changing NR_OPEN is not considered safe because of vmalloc space potential
exhaust.

This patch introduces a new sysctl (/proc/sys/fs/nr_open) wich defaults to
1024*1024, so that admins can decide to change this limit if their workload
needs it.

[akpm@linux-foundation.org: export it for sparc64]
Signed-off-by: default avatarEric Dumazet <dada1@cosmosbay.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 774ed22c
...@@ -1029,6 +1029,14 @@ nr_inodes ...@@ -1029,6 +1029,14 @@ nr_inodes
Denotes the number of inodes the system has allocated. This number will Denotes the number of inodes the system has allocated. This number will
grow and shrink dynamically. grow and shrink dynamically.
nr_open
-------
Denotes the maximum number of file-handles a process can
allocate. Default value is 1024*1024 (1048576) which should be
enough for most machines. Actual limit depends on RLIMIT_NOFILE
resource limit.
nr_free_inodes nr_free_inodes
-------------- --------------
......
...@@ -23,6 +23,7 @@ Currently, these files are in /proc/sys/fs: ...@@ -23,6 +23,7 @@ Currently, these files are in /proc/sys/fs:
- inode-max - inode-max
- inode-nr - inode-nr
- inode-state - inode-state
- nr_open
- overflowuid - overflowuid
- overflowgid - overflowgid
- suid_dumpable - suid_dumpable
...@@ -91,6 +92,15 @@ usage of file handles and you don't need to increase the maximum. ...@@ -91,6 +92,15 @@ usage of file handles and you don't need to increase the maximum.
============================================================== ==============================================================
nr_open:
This denotes the maximum number of file-handles a process can
allocate. Default value is 1024*1024 (1048576) which should be
enough for most machines. Actual limit depends on RLIMIT_NOFILE
resource limit.
==============================================================
inode-max, inode-nr & inode-state: inode-max, inode-nr & inode-state:
As with file handles, the kernel allocates the inode structures As with file handles, the kernel allocates the inode structures
......
...@@ -430,7 +430,7 @@ sys_getpagesize(void) ...@@ -430,7 +430,7 @@ sys_getpagesize(void)
asmlinkage unsigned long asmlinkage unsigned long
sys_getdtablesize(void) sys_getdtablesize(void)
{ {
return NR_OPEN; return sysctl_nr_open;
} }
/* /*
......
...@@ -356,7 +356,7 @@ asmlinkage int irix_syssgi(struct pt_regs *regs) ...@@ -356,7 +356,7 @@ asmlinkage int irix_syssgi(struct pt_regs *regs)
retval = NGROUPS_MAX; retval = NGROUPS_MAX;
goto out; goto out;
case 5: case 5:
retval = NR_OPEN; retval = sysctl_nr_open;
goto out; goto out;
case 6: case 6:
retval = 1; retval = 1;
......
...@@ -277,6 +277,7 @@ EXPORT_SYMBOL(sys_getpid); ...@@ -277,6 +277,7 @@ EXPORT_SYMBOL(sys_getpid);
EXPORT_SYMBOL(sys_geteuid); EXPORT_SYMBOL(sys_geteuid);
EXPORT_SYMBOL(sys_getuid); EXPORT_SYMBOL(sys_getuid);
EXPORT_SYMBOL(sys_getegid); EXPORT_SYMBOL(sys_getegid);
EXPORT_SYMBOL(sysctl_nr_open);
EXPORT_SYMBOL(sys_getgid); EXPORT_SYMBOL(sys_getgid);
EXPORT_SYMBOL(svr4_getcontext); EXPORT_SYMBOL(svr4_getcontext);
EXPORT_SYMBOL(svr4_setcontext); EXPORT_SYMBOL(svr4_setcontext);
......
...@@ -624,7 +624,7 @@ asmlinkage int solaris_ulimit(int cmd, int val) ...@@ -624,7 +624,7 @@ asmlinkage int solaris_ulimit(int cmd, int val)
case 3: /* UL_GMEMLIM */ case 3: /* UL_GMEMLIM */
return current->signal->rlim[RLIMIT_DATA].rlim_cur; return current->signal->rlim[RLIMIT_DATA].rlim_cur;
case 4: /* UL_GDESLIM */ case 4: /* UL_GDESLIM */
return NR_OPEN; return sysctl_nr_open;
} }
return -EINVAL; return -EINVAL;
} }
......
...@@ -859,7 +859,8 @@ asmlinkage int solaris_getmsg(unsigned int fd, u32 arg1, u32 arg2, u32 arg3) ...@@ -859,7 +859,8 @@ asmlinkage int solaris_getmsg(unsigned int fd, u32 arg1, u32 arg2, u32 arg3)
SOLD("entry"); SOLD("entry");
lock_kernel(); lock_kernel();
if(fd >= NR_OPEN) goto out; if (fd >= sysctl_nr_open)
goto out;
fdt = files_fdtable(current->files); fdt = files_fdtable(current->files);
filp = fdt->fd[fd]; filp = fdt->fd[fd];
...@@ -927,7 +928,8 @@ asmlinkage int solaris_putmsg(unsigned int fd, u32 arg1, u32 arg2, u32 arg3) ...@@ -927,7 +928,8 @@ asmlinkage int solaris_putmsg(unsigned int fd, u32 arg1, u32 arg2, u32 arg3)
SOLD("entry"); SOLD("entry");
lock_kernel(); lock_kernel();
if(fd >= NR_OPEN) goto out; if (fd >= sysctl_nr_open)
goto out;
fdt = files_fdtable(current->files); fdt = files_fdtable(current->files);
filp = fdt->fd[fd]; filp = fdt->fd[fd];
......
...@@ -24,6 +24,8 @@ struct fdtable_defer { ...@@ -24,6 +24,8 @@ struct fdtable_defer {
struct fdtable *next; struct fdtable *next;
}; };
int sysctl_nr_open __read_mostly = 1024*1024;
/* /*
* We use this list to defer free fdtables that have vmalloced * We use this list to defer free fdtables that have vmalloced
* sets/arrays. By keeping a per-cpu list, we avoid having to embed * sets/arrays. By keeping a per-cpu list, we avoid having to embed
...@@ -147,8 +149,8 @@ static struct fdtable * alloc_fdtable(unsigned int nr) ...@@ -147,8 +149,8 @@ static struct fdtable * alloc_fdtable(unsigned int nr)
nr /= (1024 / sizeof(struct file *)); nr /= (1024 / sizeof(struct file *));
nr = roundup_pow_of_two(nr + 1); nr = roundup_pow_of_two(nr + 1);
nr *= (1024 / sizeof(struct file *)); nr *= (1024 / sizeof(struct file *));
if (nr > NR_OPEN) if (nr > sysctl_nr_open)
nr = NR_OPEN; nr = sysctl_nr_open;
fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL); fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL);
if (!fdt) if (!fdt)
...@@ -233,7 +235,7 @@ int expand_files(struct files_struct *files, int nr) ...@@ -233,7 +235,7 @@ int expand_files(struct files_struct *files, int nr)
if (nr < fdt->max_fds) if (nr < fdt->max_fds)
return 0; return 0;
/* Can we expand? */ /* Can we expand? */
if (nr >= NR_OPEN) if (nr >= sysctl_nr_open)
return -EMFILE; return -EMFILE;
/* All good, so we try */ /* All good, so we try */
......
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
/* Fixed constants first: */ /* Fixed constants first: */
#undef NR_OPEN #undef NR_OPEN
#define NR_OPEN (1024*1024) /* Absolute upper limit on fd num */ extern int sysctl_nr_open;
#define INR_OPEN 1024 /* Initial setting for nfile rlimits */ #define INR_OPEN 1024 /* Initial setting for nfile rlimits */
#define BLOCK_SIZE_BITS 10 #define BLOCK_SIZE_BITS 10
......
...@@ -1472,7 +1472,7 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim) ...@@ -1472,7 +1472,7 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim)
if ((new_rlim.rlim_max > old_rlim->rlim_max) && if ((new_rlim.rlim_max > old_rlim->rlim_max) &&
!capable(CAP_SYS_RESOURCE)) !capable(CAP_SYS_RESOURCE))
return -EPERM; return -EPERM;
if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > NR_OPEN) if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > sysctl_nr_open)
return -EPERM; return -EPERM;
retval = security_task_setrlimit(resource, &new_rlim); retval = security_task_setrlimit(resource, &new_rlim);
......
...@@ -1202,6 +1202,14 @@ static struct ctl_table fs_table[] = { ...@@ -1202,6 +1202,14 @@ static struct ctl_table fs_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = &proc_dointvec, .proc_handler = &proc_dointvec,
}, },
{
.ctl_name = CTL_UNNUMBERED,
.procname = "nr_open",
.data = &sysctl_nr_open,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{ {
.ctl_name = FS_DENTRY, .ctl_name = FS_DENTRY,
.procname = "dentry-state", .procname = "dentry-state",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment