--- arch/alpha/kernel/process.c.~1~ Thu Nov 26 18:47:30 1998 +++ arch/alpha/kernel/process.c Fri Nov 27 10:25:01 1998 @@ -55,7 +55,6 @@ unsigned long init_user_stack[1024] = { STACK_MAGIC, }; static struct vm_area_struct init_mmap = INIT_MMAP; static struct fs_struct init_fs = INIT_FS; -static struct file * init_fd_array[NR_OPEN] = { NULL, }; static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS; struct mm_struct init_mm = INIT_MM; --- arch/arm/kernel/init_task.c.~1~ Sun Sep 6 18:44:47 1998 +++ arch/arm/kernel/init_task.c Fri Nov 27 10:26:13 1998 @@ -6,7 +6,6 @@ static struct vm_area_struct init_mmap = INIT_MMAP; static struct fs_struct init_fs = INIT_FS; -static struct file * init_fd_array[NR_OPEN] = { NULL, }; static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS; struct mm_struct init_mm = INIT_MM; --- arch/arm/kernel/sys_arm.c.~1~ Sun Sep 6 18:44:47 1998 +++ arch/arm/kernel/sys_arm.c Thu Nov 26 18:49:34 1998 @@ -77,7 +77,8 @@ goto out; if (!(a.flags & MAP_ANONYMOUS)) { error = -EBADF; - if (a.fd >= NR_OPEN || !(file = current->files->fd[a.fd])) + if (a.fd >= current->files->max_fds || + !(file = current->files->fd[a.fd])) goto out; } a.flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); --- arch/i386/kernel/init_task.c.~1~ Sun Sep 13 20:16:22 1998 +++ arch/i386/kernel/init_task.c Thu Nov 26 18:49:34 1998 @@ -7,7 +7,6 @@ static struct vm_area_struct init_mmap = INIT_MMAP; static struct fs_struct init_fs = INIT_FS; -static struct file * init_fd_array[NR_OPEN] = { NULL, }; static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS; struct mm_struct init_mm = INIT_MM; --- arch/m68k/kernel/process.c.~1~ Thu Nov 26 18:48:06 1998 +++ arch/m68k/kernel/process.c Fri Nov 27 10:25:52 1998 @@ -40,7 +40,6 @@ */ static struct vm_area_struct init_mmap = INIT_MMAP; static struct fs_struct init_fs = INIT_FS; -static struct file * init_fd_array[NR_OPEN] = { NULL, }; static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS; struct mm_struct init_mm = INIT_MM; --- arch/mips/kernel/init_task.c.~1~ Fri May 8 08:13:23 1998 +++ arch/mips/kernel/init_task.c Fri Nov 27 10:25:39 1998 @@ -6,7 +6,6 @@ static struct vm_area_struct init_mmap = INIT_MMAP; static struct fs_struct init_fs = INIT_FS; -static struct files * init_fd_array[NR_OPEN] = { NULL, }; static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS; struct mm_struct init_mm = INIT_MM; --- arch/mips/kernel/irixioctl.c.~1~ Thu Nov 26 18:47:31 1998 +++ arch/mips/kernel/irixioctl.c Thu Nov 26 18:49:34 1998 @@ -33,7 +33,7 @@ { struct file *filp; - if(fd >= NR_OPEN || !(filp = current->files->fd[fd])) + if(fd >= current->files->max_fds || !(filp = current->files->fd[fd])) return ((struct tty_struct *) 0); if(filp->private_data) { struct tty_struct *ttyp = (struct tty_struct *) filp->private_data; --- arch/mips/kernel/sysirix.c.~1~ Thu Nov 26 18:47:46 1998 +++ arch/mips/kernel/sysirix.c Thu Nov 26 18:49:34 1998 @@ -788,7 +788,8 @@ error = verify_area(VERIFY_WRITE, buf, sizeof(struct irix_statfs)); if (error) goto out; - if (fd >= NR_OPEN || !(file = current->files->fd[fd])) { + if (fd >= current->files->max_fds || + !(file = current->files->fd[fd])) { error = -EBADF; goto out; } @@ -1111,7 +1112,8 @@ lock_kernel(); if(!(flags & MAP_ANONYMOUS)) { - if(fd >= NR_OPEN || !(file = current->files->fd[fd])) { + if(fd >= current->files->max_fds || + !(file = current->files->fd[fd])) { retval = -EBADF; goto out; } @@ -1583,7 +1585,8 @@ error = verify_area(VERIFY_WRITE, buf, sizeof(struct irix_statvfs)); if (error) goto out; - if (fd >= NR_OPEN || !(file = current->files->fd[fd])) { + if (fd >= current->files->max_fds || + !(file = current->files->fd[fd])) { error = -EBADF; goto out; } @@ -1727,7 +1730,8 @@ } if(!(flags & MAP_ANONYMOUS)) { - if(fd >= NR_OPEN || !(file = current->files->fd[fd])) { + if(fd >= current->files->max_fds || + !(file = current->files->fd[fd])) { error = -EBADF; goto out; } @@ -1879,7 +1883,8 @@ error = verify_area(VERIFY_WRITE, buf, sizeof(struct irix_statvfs)); if (error) goto out; - if (fd >= NR_OPEN || !(file = current->files->fd[fd])) { + if (fd >= current->files->max_fds || + !(file = current->files->fd[fd])) { error = -EBADF; goto out; } @@ -2040,7 +2045,8 @@ current->pid, fd, dirent, count, eob); #endif error = -EBADF; - if (fd >= NR_OPEN || !(file = current->files->fd[fd])) + if (fd >= current->files->max_fds || + !(file = current->files->fd[fd])) goto out; dentry = file->f_dentry; @@ -2151,7 +2157,8 @@ current->pid, fd, dirent, cnt); #endif error = -EBADF; - if (fd >= NR_OPEN || !(file = current->files->fd[fd])) + if (fd >= current->files->max_fds || + !(file = current->files->fd[fd])) goto out; dentry = file->f_dentry; @@ -2212,7 +2219,8 @@ current->pid, fd, dirent, cnt); #endif error = -EBADF; - if (fd >= NR_OPEN || !(file = current->files->fd[fd])) + if (fd >= current->files->max_fds || + !(file = current->files->fd[fd])) goto out; dentry = file->f_dentry; --- arch/ppc/kernel/process.c.~1~ Thu Nov 26 18:48:16 1998 +++ arch/ppc/kernel/process.c Fri Nov 27 10:25:47 1998 @@ -65,7 +65,6 @@ static struct vm_area_struct init_mmap = INIT_MMAP; static struct fs_struct init_fs = INIT_FS; -static struct file * init_fd_array[NR_OPEN] = { NULL, }; static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS; --- arch/ppc/kernel/syscalls.c.~1~ Wed Sep 30 18:14:17 1998 +++ arch/ppc/kernel/syscalls.c Thu Nov 26 18:49:34 1998 @@ -205,7 +205,8 @@ lock_kernel(); if (!(flags & MAP_ANONYMOUS)) { - if (fd >= NR_OPEN || !(file = current->files->fd[fd])) + if (fd >= current->files->max_fds || + !(file = current->files->fd[fd])) goto out; } --- arch/sparc/kernel/init_task.c.~1~ Thu Nov 26 18:47:47 1998 +++ arch/sparc/kernel/init_task.c Fri Nov 27 10:25:32 1998 @@ -6,7 +6,6 @@ static struct vm_area_struct init_mmap = INIT_MMAP; static struct fs_struct init_fs = INIT_FS; -static struct file * init_fd_array[NR_OPEN] = { NULL, }; static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS; struct mm_struct init_mm = INIT_MM; --- arch/sparc64/kernel/init_task.c.~1~ Wed Apr 15 01:44:20 1998 +++ arch/sparc64/kernel/init_task.c Fri Nov 27 10:25:56 1998 @@ -6,7 +6,6 @@ static struct vm_area_struct init_mmap = INIT_MMAP; static struct fs_struct init_fs = INIT_FS; -static struct file * init_fd_array[NR_OPEN] = { NULL, }; static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS; struct mm_struct init_mm = INIT_MM; --- arch/sparc64/solaris/timod.c.~1~ Wed Apr 15 01:44:21 1998 +++ arch/sparc64/solaris/timod.c Thu Nov 26 18:49:34 1998 @@ -866,7 +866,7 @@ SOLD("entry"); lock_kernel(); - if(fd >= NR_OPEN) goto out; + if(fd >= current->files->max_fds) goto out; filp = current->files->fd[fd]; if(!filp) goto out; @@ -933,7 +933,7 @@ SOLD("entry"); lock_kernel(); - if(fd >= NR_OPEN) goto out; + if(fd >= current->files->max_fds) goto out; filp = current->files->fd[fd]; if(!filp) goto out; --- fs/Makefile.~1~ Mon Aug 31 21:01:35 1998 +++ fs/Makefile Mon Nov 30 16:01:01 1998 @@ -13,7 +13,7 @@ O_OBJS = open.o read_write.o devices.o file_table.o buffer.o \ super.o block_dev.o stat.o exec.o pipe.o namei.o fcntl.o \ ioctl.o readdir.o select.o fifo.o locks.o filesystems.o \ - dcache.o inode.o attr.o bad_inode.o $(BINFMTS) + dcache.o inode.o attr.o bad_inode.o file.o $(BINFMTS) MOD_LIST_NAME := FS_MODULES ALL_SUB_DIRS = coda minix ext2 fat msdos vfat proc isofs nfs umsdos ntfs \ --- fs/exec.c.~1~ Thu Nov 26 18:48:25 1998 +++ fs/exec.c Thu Nov 26 18:49:34 1998 @@ -482,10 +482,10 @@ unsigned long set, i; i = j * __NFDBITS; - if (i >= files->max_fds) + if (i >= files->max_fds || i >= files->max_fdset) break; - set = files->close_on_exec.fds_bits[j]; - files->close_on_exec.fds_bits[j] = 0; + set = files->close_on_exec->fds_bits[j]; + files->close_on_exec->fds_bits[j] = 0; j++; for ( ; set ; i++,set >>= 1) { if (set & 1) --- fs/fcntl.c.~1~ Thu Nov 26 18:48:25 1998 +++ fs/fcntl.c Thu Nov 26 18:49:34 1998 @@ -12,14 +12,15 @@ extern int sock_fcntl (struct file *, unsigned int cmd, unsigned long arg); -static inline int dupfd(unsigned int fd, unsigned int arg) +static inline int dupfd(unsigned int fd, unsigned int start) { struct files_struct * files = current->files; struct file * file; + unsigned int newfd; int error; error = -EINVAL; - if (arg >= NR_OPEN) + if (start >= NR_OPEN) goto out; error = -EBADF; @@ -27,15 +28,39 @@ if (!file) goto out; +repeat: error = -EMFILE; - arg = find_next_zero_bit(&files->open_fds, NR_OPEN, arg); - if (arg >= current->rlim[RLIMIT_NOFILE].rlim_cur) + if (start < files->next_fd) + start = files->next_fd; + /* At this point, start MUST be <= max_fdset */ +#if 1 + if (start > files->max_fdset) + printk (KERN_ERR "dupfd: fd %d, max %d\n", + start, files->max_fdset); +#endif + newfd = find_next_zero_bit(files->open_fds->fds_bits, + files->max_fdset, + start); + if (newfd >= current->rlim[RLIMIT_NOFILE].rlim_cur) goto out_putf; - FD_SET(arg, &files->open_fds); - FD_CLR(arg, &files->close_on_exec); - fd_install(arg, file); - error = arg; + + error = expand_files(files, newfd); + if (error < 0) + goto out_putf; + if (error) /* If we might have blocked, try again. */ + goto repeat; + + FD_SET(newfd, files->open_fds); + FD_CLR(newfd, files->close_on_exec); + if (start <= files->next_fd) + files->next_fd = newfd + 1; + fd_install(newfd, file); + error = newfd; out: +#ifdef FDSET_DEBUG + if (error < 0) + printk (KERN_ERR __FUNCTION__ ": return %d\n", error); +#endif return error; out_putf: @@ -48,18 +73,30 @@ int err = -EBADF; lock_kernel(); +#ifdef FDSET_DEBUG + printk (KERN_ERR __FUNCTION__ " 0: oldfd = %d, newfd = %d\n", + oldfd, newfd); +#endif if (!fcheck(oldfd)) goto out; + if (newfd >= NR_OPEN) + goto out; /* following POSIX.1 6.2.1 */ + err = newfd; if (newfd == oldfd) goto out; - err = -EBADF; - if (newfd >= NR_OPEN) - goto out; /* following POSIX.1 6.2.1 */ + /* We must be able to do the fd setting inside dupfd() without + blocking after the sys_close(). */ + if ((err = expand_files(current->files, newfd)) < 0) + goto out; + sys_close(newfd); err = dupfd(oldfd, newfd); out: +#ifdef FDSET_DEBUG + printk (KERN_ERR __FUNCTION__ ": return %d\n", err); +#endif unlock_kernel(); return err; } @@ -71,6 +108,10 @@ lock_kernel(); ret = dupfd(fildes, 0); unlock_kernel(); +#ifdef FDSET_DEBUG + if (ret < 0) + printk (KERN_ERR __FUNCTION__ ": return %d\n", ret); +#endif return ret; } @@ -111,19 +152,20 @@ filp = fget(fd); if (!filp) goto out; + err = 0; switch (cmd) { case F_DUPFD: err = dupfd(fd, arg); break; case F_GETFD: - err = FD_ISSET(fd, ¤t->files->close_on_exec); + err = FD_ISSET(fd, current->files->close_on_exec); break; case F_SETFD: if (arg&1) - FD_SET(fd, ¤t->files->close_on_exec); + FD_SET(fd, current->files->close_on_exec); else - FD_CLR(fd, ¤t->files->close_on_exec); + FD_CLR(fd, current->files->close_on_exec); break; case F_GETFL: err = filp->f_flags; @@ -151,7 +193,6 @@ err = filp->f_owner.pid; break; case F_SETOWN: - err = 0; filp->f_owner.pid = arg; filp->f_owner.uid = current->uid; filp->f_owner.euid = current->euid; @@ -171,10 +212,9 @@ break; default: /* sockets need a few special fcntls. */ + err = -EINVAL; if (S_ISSOCK (filp->f_dentry->d_inode->i_mode)) err = sock_fcntl (filp, cmd, arg); - else - err = -EINVAL; break; } fput(filp); --- fs/file.c.~1~ Mon Nov 30 15:59:30 1998 +++ fs/file.c Mon Nov 30 18:13:37 1998 @@ -0,0 +1,224 @@ +/* + * linux/fs/open.c + * + * Copyright (C) 1998, Stephen Tweedie and Bill Hawes + * + * Manage the dynamic fd arrays in the process files_struct. + */ + +#include +#include +#include +#include +#include + +#include + + +/* + * Allocate an fd array, using get_free_page() if possible. + * Note: the array isn't cleared at allocation time. + */ +struct file ** alloc_fd_array(int num) +{ + struct file **new_fds; + int size = num * sizeof(struct file *); + + if (size < PAGE_SIZE) + new_fds = (struct file **) kmalloc(size, GFP_KERNEL); + else if (size == PAGE_SIZE) + new_fds = (struct file **) __get_free_page(GFP_KERNEL); + else + new_fds = (struct file **) vmalloc(size); + return new_fds; +} + +void free_fd_array(struct file **array, int num) +{ + int size = num * sizeof(struct file *); + + if (!array) { + printk (KERN_ERR __FUNCTION__ "array = 0 (num = %d)\n", num); + return; + } + + if (num <= NR_OPEN_DEFAULT) /* Don't free the embedded fd array! */ + return; + else if (size < PAGE_SIZE) + kfree(array); + else if (size == PAGE_SIZE) + free_page((unsigned long) array); + else + vfree(array); +} + +/* + * Expand the fd array in the files_struct. + */ + +int expand_fd_array(struct files_struct *files, int nr) +{ + struct file **new_fds; + int error, nfds; + + + error = -EMFILE; + if (files->max_fds >= NR_OPEN || nr > NR_OPEN) + goto out; + + nfds = files->max_fds; + + /* + * Expand to the max in easy steps, and keep expanding it until + * we have enough for the requested fd array size. + */ + + do { +#if NR_OPEN_DEFAULT < 256 + if (nfds < 256) + nfds = 256; + else +#endif + if (nfds < (PAGE_SIZE / sizeof(struct file *))) + nfds = PAGE_SIZE / sizeof(struct file *); + else { + nfds = nfds * 2; + if (nfds > NR_OPEN) + nfds = NR_OPEN; + } + } while (nfds < nr); + + error = -ENOMEM; + new_fds = alloc_fd_array(nfds); + if (!new_fds) + goto out; + + /* Copy the existing array and install the new pointer */ + + if (nfds > files->max_fds) { + struct file **old_fds; + int i = files->max_fds; + + old_fds = files->fd; + files->fd = new_fds; + files->max_fds = nfds; + /* Don't copy/clear the array if we are creating a new + fd array for fork() */ + if (i) { + memcpy(new_fds, old_fds, i * sizeof(struct file *)); + /* clear the remainder of the array */ + memset(&new_fds[i], 0, + (nfds-i) * sizeof(struct file *)); + free_fd_array(old_fds, i); + } + } else { + /* Somebody expanded the array while we slept ... */ + free_fd_array(new_fds, nfds); + } + error = 0; +out: + return error; +} + +/* + * Allocate an fdset array, using get_free_page() if possible. + * Note: the array isn't cleared at allocation time. + */ +fd_set * alloc_fdset(int num) +{ + fd_set *new_fdset; + int size = num / 8; + + if (size < PAGE_SIZE) + new_fdset = (fd_set *) kmalloc(size, GFP_KERNEL); + else if (size == PAGE_SIZE) + new_fdset = (fd_set *) __get_free_page(GFP_KERNEL); + else + new_fdset = (fd_set *) vmalloc(size); + return new_fdset; +} + +void free_fdset(fd_set *array, int num) +{ + int size = num / 8; + + if (!array) { + printk (KERN_ERR __FUNCTION__ "array = 0 (num = %d)\n", num); + return; + } + + if (num <= __FD_SETSIZE) /* Don't free an embedded fdset */ + return; + else if (size < PAGE_SIZE) + kfree(array); + else if (size == PAGE_SIZE) + free_page((unsigned long) array); + else + vfree(array); +} + +/* + * Expand the fdset in the files_struct. + */ +int expand_fdset(struct files_struct *files, int nr) +{ + fd_set *new_openset = 0, *new_execset = 0; + int error, nfds = 0; + + error = -EMFILE; + if (files->max_fdset >= NR_OPEN || nr > NR_OPEN) + goto out; + + nfds = files->max_fdset; + /* Expand to the max in easy steps */ + do { + if (nfds < (PAGE_SIZE * 8)) + nfds = PAGE_SIZE * 8; + else { + nfds = nfds * 2; + if (nfds > NR_OPEN) + nfds = NR_OPEN; + } + } while (nfds < nr); + + error = -ENOMEM; + new_openset = alloc_fdset(nfds); + new_execset = alloc_fdset(nfds); + if (!new_openset || !new_execset) + goto out; + + error = 0; + + /* Copy the existing tables and install the new pointers */ + if (nfds > files->max_fdset) { + int i = files->max_fdset / (sizeof(unsigned long) * 8); + int count = (nfds - files->max_fdset) / 8; + + /* + * Don't copy the entire array if the current fdset is + * not yet initialised. + */ + if (i) { + memcpy (new_openset, files->open_fds, files->max_fdset/8); + memcpy (new_execset, files->close_on_exec, files->max_fdset/8); + memset (&new_openset->fds_bits[i], 0, count); + memset (&new_execset->fds_bits[i], 0, count); + } + + free_fdset (files->close_on_exec, files->max_fdset); + free_fdset (files->open_fds, files->max_fdset); + files->max_fdset = nfds; + files->open_fds = new_openset; + files->close_on_exec = new_execset; + return 0; + } + /* Somebody expanded the array while we slept ... */ + +out: + if (new_openset) + free_fdset(new_openset, nfds); + if (new_execset) + free_fdset(new_execset, nfds); + return error; +} + --- fs/ioctl.c.~1~ Thu Nov 26 18:48:25 1998 +++ fs/ioctl.c Thu Nov 26 18:49:34 1998 @@ -52,11 +52,11 @@ error = 0; switch (cmd) { case FIOCLEX: - FD_SET(fd, ¤t->files->close_on_exec); + FD_SET(fd, current->files->close_on_exec); break; case FIONCLEX: - FD_CLR(fd, ¤t->files->close_on_exec); + FD_CLR(fd, current->files->close_on_exec); break; case FIONBIO: --- fs/open.c.~1~ Thu Nov 26 18:48:25 1998 +++ fs/open.c Thu Nov 26 18:49:34 1998 @@ -690,9 +690,13 @@ { struct files_struct * files = current->files; int fd, error; - + +repeat: error = -EMFILE; - fd = find_first_zero_bit(&files->open_fds, NR_OPEN); + + fd = find_next_zero_bit(files->open_fds, + current->files->max_fdset, + files->next_fd); /* * N.B. For clone tasks sharing a files structure, this test * will limit the total number of files that can be opened. @@ -700,10 +704,27 @@ if (fd >= current->rlim[RLIMIT_NOFILE].rlim_cur) goto out; - /* Check here for fd > files->max_fds to do dynamic expansion */ + /* Do we need to expand the fdset array? */ + if (fd >= current->files->max_fdset) { + error = expand_fdset(files, 0); + if (!error) + goto repeat; + goto out; + } + + /* + * Check whether we need to expand the fd array. + */ + if (fd >= files->max_fds) { + error = expand_fd_array(files, 0); + if (!error) + goto repeat; + goto out; + } - FD_SET(fd, &files->open_fds); - FD_CLR(fd, &files->close_on_exec); + FD_SET(fd, files->open_fds); + FD_CLR(fd, files->close_on_exec); + files->next_fd = fd + 1; #if 1 /* Sanity check */ if (files->fd[fd] != NULL) { @@ -714,12 +735,18 @@ error = fd; out: +#ifdef FDSET_DEBUG + if (error < 0) + printk (KERN_ERR __FUNCTION__ ": return %d\n", error); +#endif return error; } inline void put_unused_fd(unsigned int fd) { - FD_CLR(fd, ¤t->files->open_fds); + FD_CLR(fd, current->files->open_fds); + if (fd < current->files->next_fd) + current->files->next_fd = fd; } asmlinkage int sys_open(const char * filename, int flags, int mode) @@ -823,7 +850,7 @@ struct files_struct * files = current->files; files->fd[fd] = NULL; put_unused_fd(fd); - FD_CLR(fd, &files->close_on_exec); + FD_CLR(fd, files->close_on_exec); error = close_fp(filp, files); } unlock_kernel(); --- fs/proc/array.c.~1~ Thu Nov 26 18:48:51 1998 +++ fs/proc/array.c Thu Nov 26 18:51:50 1998 @@ -709,11 +709,13 @@ "PPid:\t%d\n" "Uid:\t%d\t%d\t%d\t%d\n" "Gid:\t%d\t%d\t%d\t%d\n" + "FDSize:\t%d\n" "Groups:\t", get_task_state(p), p->pid, p->p_pptr->pid, p->uid, p->euid, p->suid, p->fsuid, - p->gid, p->egid, p->sgid, p->fsgid); + p->gid, p->egid, p->sgid, p->fsgid, + p->files ? p->files->max_fds : 0); for (g = 0; g < p->ngroups; g++) buffer += sprintf(buffer, "%d ", p->groups[g]); --- fs/select.c.~1~ Thu Nov 26 18:48:51 1998 +++ fs/select.c Thu Nov 26 18:49:35 1998 @@ -69,7 +69,7 @@ /* handle last in-complete long-word first */ set = ~(~0UL << (n & (__NFDBITS-1))); n /= __NFDBITS; - open_fds = current->files->open_fds.fds_bits+n; + open_fds = current->files->open_fds->fds_bits+n; in = fds->in+n; max = 0; if (set) { @@ -328,7 +328,7 @@ lock_kernel(); /* Do a sanity check on nfds ... */ err = -EINVAL; - if (nfds > NR_OPEN) + if (nfds > current->files->max_fds) goto out; if (timeout < 0) --- include/asm-alpha/resource.h.~1~ Fri Aug 23 13:30:14 1996 +++ include/asm-alpha/resource.h Fri Nov 27 10:32:19 1998 @@ -28,7 +28,7 @@ {_STK_LIM, _STK_LIM}, /* RLIMIT_STACK */ \ { 0, LONG_MAX}, /* RLIMIT_CORE */ \ {LONG_MAX, LONG_MAX}, /* RLIMIT_RSS */ \ - { NR_OPEN, NR_OPEN}, /* RLIMIT_NOFILE */ \ + {INR_OPEN, INR_OPEN}, /* RLIMIT_NOFILE */ \ {LONG_MAX, LONG_MAX}, /* RLIMIT_AS */ \ {MAX_TASKS_PER_USER, MAX_TASKS_PER_USER}, /* RLIMIT_NPROC */ \ {LONG_MAX, LONG_MAX}, /* RLIMIT_MEMLOCK */ \ --- include/asm-arm/resource.h.~1~ Wed Jan 21 00:39:43 1998 +++ include/asm-arm/resource.h Fri Nov 27 10:33:29 1998 @@ -29,7 +29,7 @@ { 0, LONG_MAX }, \ { LONG_MAX, LONG_MAX }, \ { MAX_TASKS_PER_USER, MAX_TASKS_PER_USER }, \ - { NR_OPEN, NR_OPEN }, \ + { INR_OPEN, INR_OPEN }, \ { LONG_MAX, LONG_MAX }, \ { LONG_MAX, LONG_MAX }, \ } --- include/asm-i386/resource.h.~1~ Thu Nov 26 18:48:26 1998 +++ include/asm-i386/resource.h Fri Nov 27 10:30:12 1998 @@ -29,7 +29,7 @@ { 0, LONG_MAX }, \ { LONG_MAX, LONG_MAX }, \ { MAX_TASKS_PER_USER, MAX_TASKS_PER_USER }, \ - { NR_OPEN, NR_OPEN }, \ + { INR_OPEN, INR_OPEN }, \ { LONG_MAX, LONG_MAX }, \ { LONG_MAX, LONG_MAX }, \ } --- include/asm-m68k/resource.h.~1~ Wed Sep 25 08:47:42 1996 +++ include/asm-m68k/resource.h Fri Nov 27 10:32:32 1998 @@ -29,7 +29,7 @@ { 0, LONG_MAX}, \ {LONG_MAX, LONG_MAX}, \ {MAX_TASKS_PER_USER, MAX_TASKS_PER_USER}, \ - {NR_OPEN, NR_OPEN}, \ + {INR_OPEN, INR_OPEN}, \ {LONG_MAX, LONG_MAX}, \ {LONG_MAX, LONG_MAX} \ } --- include/asm-mips/resource.h.~1~ Thu Jun 26 20:33:40 1997 +++ include/asm-mips/resource.h Fri Nov 27 10:32:02 1998 @@ -35,7 +35,7 @@ {LONG_MAX, LONG_MAX}, \ {_STK_LIM, _STK_LIM}, \ { 0, LONG_MAX}, \ - {NR_OPEN, NR_OPEN}, \ + {INR_OPEN, INR_OPEN}, \ {LONG_MAX, LONG_MAX}, \ {LONG_MAX, LONG_MAX}, \ {MAX_TASKS_PER_USER, MAX_TASKS_PER_USER}, \ --- include/asm-ppc/resource.h.~1~ Sat Aug 16 17:51:09 1997 +++ include/asm-ppc/resource.h Fri Nov 27 10:32:59 1998 @@ -25,7 +25,7 @@ { 0, LONG_MAX}, /* RLIMIT_CORE */ \ {LONG_MAX, LONG_MAX}, /* RLIMIT_RSS */ \ {MAX_TASKS_PER_USER, MAX_TASKS_PER_USER}, /* RLIMIT_NPROC */ \ - { NR_OPEN, NR_OPEN}, /* RLIMIT_NOFILE */ \ + {INR_OPEN, INR_OPEN}, /* RLIMIT_NOFILE */ \ {LONG_MAX, LONG_MAX}, /* RLIMIT_MEMLOCK */ \ {LONG_MAX, LONG_MAX}, /* RLIMIT_AS */ \ } --- include/asm-sparc/resource.h.~1~ Fri Dec 13 09:37:40 1996 +++ include/asm-sparc/resource.h Fri Nov 27 10:32:47 1998 @@ -31,7 +31,7 @@ {LONG_MAX, LONG_MAX}, {LONG_MAX, LONG_MAX}, \ {LONG_MAX, LONG_MAX}, {_STK_LIM, _STK_LIM}, \ { 0, LONG_MAX}, {LONG_MAX, LONG_MAX}, \ - {NR_OPEN, NR_OPEN}, {MAX_TASKS_PER_USER, MAX_TASKS_PER_USER}, \ + {INR_OPEN, INR_OPEN}, {MAX_TASKS_PER_USER, MAX_TASKS_PER_USER}, \ {LONG_MAX, LONG_MAX}, {LONG_MAX, LONG_MAX} \ } --- include/asm-sparc64/resource.h.~1~ Thu Jun 26 20:33:40 1997 +++ include/asm-sparc64/resource.h Fri Nov 27 10:33:14 1998 @@ -30,7 +30,7 @@ {LONG_MAX, LONG_MAX}, {LONG_MAX, LONG_MAX}, \ {LONG_MAX, LONG_MAX}, {_STK_LIM, _STK_LIM}, \ { 0, LONG_MAX}, {LONG_MAX, LONG_MAX}, \ - {NR_OPEN, NR_OPEN}, {MAX_TASKS_PER_USER, MAX_TASKS_PER_USER}, \ + {INR_OPEN, INR_OPEN}, {MAX_TASKS_PER_USER, MAX_TASKS_PER_USER}, \ {LONG_MAX, LONG_MAX}, {LONG_MAX, LONG_MAX} \ } --- include/linux/fs.h.~1~ Thu Nov 26 18:48:13 1998 +++ include/linux/fs.h Fri Nov 27 11:53:44 1998 @@ -27,17 +27,19 @@ /* - * It's silly to have NR_OPEN bigger than NR_FILE, but I'll fix - * that later. Anyway, now the file code is no longer dependent - * on bitmaps in unsigned longs, but uses the new fd_set structure.. + * It's silly to have NR_OPEN bigger than NR_FILE, but you can change + * the file limit at runtime and only root can increase the per-process + * nr_file rlimit, so it's safe to set up a ridiculously high absolute + * upper limit on files-per-process. * * Some programs (notably those using select()) may have to be - * recompiled to take full advantage of the new limits.. + * recompiled to take full advantage of the new limits.. */ /* Fixed constants first: */ #undef NR_OPEN -#define NR_OPEN 1024 +#define NR_OPEN (1024*1024) /* Absolute upper limit on fd num */ +#define INR_OPEN 1024 /* Initial setting for nfile rlimits */ #define BLOCK_SIZE_BITS 10 #define BLOCK_SIZE (1<count); } extern void mmput(struct mm_struct *); + +/* + * Routines for handling the fd arrays + */ +extern struct file ** alloc_fd_array(int); +extern int expand_fd_array(struct files_struct *, int nr); +extern void free_fd_array(struct file **, int); + +extern fd_set *alloc_fdset(int); +extern int expand_fdset(struct files_struct *, int nr); +extern void free_fdset(fd_set *, int); + +/* Expand files. Return <0 on error; 0 nothing done; 1 files expanded, + * we may have blocked. */ +static inline int expand_files(struct files_struct *files, int nr) +{ + int err, expand = 0; +#ifdef FDSET_DEBUG + printk (KERN_ERR __FUNCTION__ " %d: nr = %d\n", current->pid, nr); +#endif + + if (nr >= files->max_fdset) { + expand = 1; + if ((err = expand_fdset(files, nr))) + goto out; + } + if (nr >= files->max_fds) { + expand = 1; + if ((err = expand_fd_array(files, nr))) + goto out; + } + err = expand; + out: +#ifdef FDSET_DEBUG + if (err) + printk (KERN_ERR __FUNCTION__ " %d: return %d\n", current->pid, err); +#endif + return err; +} extern int copy_thread(int, unsigned long, unsigned long, struct task_struct *, struct pt_regs *); extern void flush_thread(void); --- kernel/exit.c.~1~ Thu Nov 26 18:48:52 1998 +++ kernel/exit.c Thu Nov 26 20:51:44 1998 @@ -159,11 +159,11 @@ j = 0; for (;;) { - unsigned long set = files->open_fds.fds_bits[j]; + unsigned long set; i = j * __NFDBITS; - j++; - if (i >= files->max_fds) + if (i >= files->max_fdset || i >= files->max_fds) break; + set = files->open_fds->fds_bits[j++]; while (set) { if (set & 1) { struct file * file = files->fd[i]; @@ -189,12 +189,14 @@ if (atomic_dec_and_test(&files->count)) { close_files(files); /* - * Free the fd array as appropriate ... + * Free the fd and fdset arrays if we expanded them. */ - if (NR_OPEN * sizeof(struct file *) == PAGE_SIZE) - free_page((unsigned long) files->fd); - else - kfree(files->fd); + if (files->fd != &files->fd_array[0]) + free_fd_array(files->fd, files->max_fds); + if (files->max_fdset > __FD_SETSIZE) { + free_fdset(files->open_fds, files->max_fdset); + free_fdset(files->close_on_exec, files->max_fdset); + } kmem_cache_free(files_cachep, files); } } --- kernel/fork.c.~1~ Thu Nov 26 18:48:52 1998 +++ kernel/fork.c Mon Nov 30 16:00:15 1998 @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -350,32 +351,11 @@ return 0; } -/* - * Copy a fd_set and compute the maximum fd it contains. - */ -static inline int __copy_fdset(unsigned long *d, unsigned long *src) -{ - int i; - unsigned long *p = src; - unsigned long *max = src; - - for (i = __FDSET_LONGS; i; --i) { - if ((*d++ = *p++) != 0) - max = p; - } - return (max - src)*sizeof(long)*8; -} - -static inline int copy_fdset(fd_set *dst, fd_set *src) -{ - return __copy_fdset(dst->fds_bits, src->fds_bits); -} - static int copy_files(unsigned long clone_flags, struct task_struct * tsk) { struct files_struct *oldf, *newf; struct file **old_fds, **new_fds; - int size, i, error = 0; + int nfds, size, i, error = 0; /* * A background process may not have any files ... @@ -395,25 +375,73 @@ if (!newf) goto out; - /* - * Allocate the fd array, using get_free_page() if possible. - * Eventually we want to make the array size variable ... - */ - size = NR_OPEN * sizeof(struct file *); - if (size == PAGE_SIZE) - new_fds = (struct file **) __get_free_page(GFP_KERNEL); - else - new_fds = (struct file **) kmalloc(size, GFP_KERNEL); - if (!new_fds) - goto out_release; + size = oldf->max_fdset; + nfds = NR_OPEN_DEFAULT; + +#ifdef FDSET_DEBUG + printk (KERN_ERR __FUNCTION__ " size = %d/%d\n", + oldf->max_fds, oldf->max_fdset); +#endif atomic_set(&newf->count, 1); - newf->max_fds = NR_OPEN; - newf->fd = new_fds; - newf->close_on_exec = oldf->close_on_exec; - i = copy_fdset(&newf->open_fds, &oldf->open_fds); + newf->next_fd = 0; + newf->max_fds = NR_OPEN_DEFAULT; + newf->max_fdset = __FD_SETSIZE; + newf->close_on_exec = &newf->close_on_exec_init; + newf->open_fds = &newf->open_fds_init; + newf->fd = &newf->fd_array[0]; + + /* Even if the old fdset gets grown here, we'll only copy "size" fds */ + if (size > __FD_SETSIZE) { + newf->max_fdset = 0; + error = expand_fdset(newf, size); + if (error) + goto out_release; + } + memcpy(newf->open_fds->fds_bits, oldf->open_fds->fds_bits, size/8); + memcpy(newf->close_on_exec->fds_bits, oldf->close_on_exec->fds_bits, size/8); + if (newf->max_fdset > size) { + int left = (newf->max_fdset-size)/8; + int start = size / (8 * sizeof(unsigned long)); + + memset(&newf->open_fds->fds_bits[start], 0, left); + memset(&newf->close_on_exec->fds_bits[start], 0, left); + } + + /* Find the last open fd */ + for (i = size/(8*sizeof(long)); i > 0; ) { + if (newf->open_fds->fds_bits[--i]) + break; + } + i = (i+1) * 8 * sizeof(long); + +#ifdef FDSET_DEBUG + printk (KERN_ERR __FUNCTION__ " first-free = %d/%d\n", i, size); +#endif + + /* Do a sanity check ... */ + if (i > oldf->max_fds) + printk(KERN_ERR + "copy_files: pid %d, open files %d exceeds max %d!\n", + current->pid, i, oldf->max_fds); + + /* + * Check whether we need to allocate a larger fd array. + * Note: we're not a clone task, so the open count won't + * change. + */ + if (i > NR_OPEN_DEFAULT) { + newf->max_fds = 0; + error = expand_fd_array(newf, i); + if (error) + goto out_release; + nfds = newf->max_fds; + } + /* compute the remainder to be cleared */ + size = (nfds - i) * sizeof(struct file *); old_fds = oldf->fd; + new_fds = newf->fd; for (; i != 0; i--) { struct file *f = *old_fds++; *new_fds = f; @@ -422,14 +450,20 @@ new_fds++; } /* This is long word aligned thus could use a optimized version */ - memset(new_fds, 0, (char *)newf->fd + size - (char *)new_fds); + memset(new_fds, 0, size); tsk->files = newf; error = 0; out: +#ifdef FDSET_DEBUG + if (error) + printk (KERN_ERR "copy_files: return %d\n", error); +#endif return error; out_release: + free_fdset (newf->close_on_exec, newf->max_fdset); + free_fdset (newf->open_fds, newf->max_fdset); kmem_cache_free(files_cachep, newf); goto out; }