diff -urN FS-base/fs/dquot.c FS-pinned/fs/dquot.c --- FS-base/fs/dquot.c 2005-10-30 23:05:47.000000000 -0500 +++ FS-pinned/fs/dquot.c 2005-11-03 06:59:12.000000000 -0500 @@ -1320,13 +1320,11 @@ int cnt; struct quota_info *dqopt = sb_dqopt(sb); struct inode *toputinode[MAXQUOTAS]; - struct vfsmount *toputmnt[MAXQUOTAS]; /* We need to serialize quota_off() for device */ down(&dqopt->dqonoff_sem); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { toputinode[cnt] = NULL; - toputmnt[cnt] = NULL; if (type != -1 && cnt != type) continue; if (!sb_has_quota_enabled(sb, cnt)) @@ -1347,9 +1345,7 @@ put_quota_format(dqopt->info[cnt].dqi_format); toputinode[cnt] = dqopt->files[cnt]; - toputmnt[cnt] = dqopt->mnt[cnt]; dqopt->files[cnt] = NULL; - dqopt->mnt[cnt] = NULL; dqopt->info[cnt].dqi_flags = 0; dqopt->info[cnt].dqi_igrace = 0; dqopt->info[cnt].dqi_bgrace = 0; @@ -1357,10 +1353,7 @@ } up(&dqopt->dqonoff_sem); /* Sync the superblock so that buffers with quota data are written to - * disk (and so userspace sees correct data afterwards). - * The reference to vfsmnt we are still holding protects us from - * umount (we don't have it only when quotas are turned on/off for - * journal replay but in that case we are guarded by the fs anyway). */ + * disk (and so userspace sees correct data afterwards). */ if (sb->s_op->sync_fs) sb->s_op->sync_fs(sb, 1); sync_blockdev(sb->s_bdev); @@ -1384,10 +1377,6 @@ iput(toputinode[cnt]); } up(&dqopt->dqonoff_sem); - /* We don't hold the reference when we turned on quotas - * just for the journal replay... */ - if (toputmnt[cnt]) - mntput(toputmnt[cnt]); } if (sb->s_bdev) invalidate_bdev(sb->s_bdev, 0); @@ -1502,11 +1491,8 @@ /* Quota file not on the same filesystem? */ if (nd.mnt->mnt_sb != sb) error = -EXDEV; - else { + else error = vfs_quota_on_inode(nd.dentry->d_inode, type, format_id); - if (!error) - sb_dqopt(sb)->mnt[type] = mntget(nd.mnt); - } out_path: path_release(&nd); return error; diff -urN FS-base/fs/namespace.c FS-pinned/fs/namespace.c --- FS-base/fs/namespace.c 2005-10-21 19:06:42.000000000 -0400 +++ FS-pinned/fs/namespace.c 2005-11-03 06:59:12.000000000 -0500 @@ -172,7 +172,7 @@ return mnt; } -void __mntput(struct vfsmount *mnt) +static inline void __mntput(struct vfsmount *mnt) { struct super_block *sb = mnt->mnt_sb; dput(mnt->mnt_root); @@ -180,7 +180,46 @@ deactivate_super(sb); } -EXPORT_SYMBOL(__mntput); +void mntput_no_expire(struct vfsmount *mnt) +{ +repeat: + if (atomic_dec_and_lock(&mnt->mnt_count, &vfsmount_lock)) { + if (likely(!mnt->mnt_pinned)) { + spin_unlock(&vfsmount_lock); + __mntput(mnt); + return; + } + atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count); + mnt->mnt_pinned = 0; + spin_unlock(&vfsmount_lock); + acct_auto_close_mnt(mnt); + security_sb_umount_close(mnt); + goto repeat; + } +} + +EXPORT_SYMBOL(mntput_no_expire); + +void mnt_pin(struct vfsmount *mnt) +{ + spin_lock(&vfsmount_lock); + mnt->mnt_pinned++; + spin_unlock(&vfsmount_lock); +} + +EXPORT_SYMBOL(mnt_pin); + +void mnt_unpin(struct vfsmount *mnt) +{ + spin_lock(&vfsmount_lock); + if (mnt->mnt_pinned) { + atomic_inc(&mnt->mnt_count); + mnt->mnt_pinned--; + } + spin_unlock(&vfsmount_lock); +} + +EXPORT_SYMBOL(mnt_unpin); /* iterator */ static void *m_start(struct seq_file *m, loff_t *pos) @@ -435,16 +474,6 @@ down_write(¤t->namespace->sem); spin_lock(&vfsmount_lock); - if (atomic_read(&sb->s_active) == 1) { - /* last instance - try to be smart */ - spin_unlock(&vfsmount_lock); - lock_kernel(); - DQUOT_OFF(sb); - acct_auto_close(sb); - unlock_kernel(); - security_sb_umount_close(mnt); - spin_lock(&vfsmount_lock); - } retval = -EBUSY; if (atomic_read(&mnt->mnt_count) == 2 || flags & MNT_DETACH) { if (!list_empty(&mnt->mnt_list)) @@ -850,17 +879,6 @@ detach_mnt(mnt, &old_nd); spin_unlock(&vfsmount_lock); path_release(&old_nd); - - /* - * Now lay it to rest if this was the last ref on the superblock - */ - if (atomic_read(&mnt->mnt_sb->s_active) == 1) { - /* last instance - try to be smart */ - lock_kernel(); - DQUOT_OFF(mnt->mnt_sb); - acct_auto_close(mnt->mnt_sb); - unlock_kernel(); - } mntput(mnt); } else { /* diff -urN FS-base/fs/super.c FS-pinned/fs/super.c --- FS-base/fs/super.c 2005-10-30 23:05:47.000000000 -0500 +++ FS-pinned/fs/super.c 2005-11-03 06:59:12.000000000 -0500 @@ -171,6 +171,7 @@ if (atomic_dec_and_lock(&s->s_active, &sb_lock)) { s->s_count -= S_BIAS-1; spin_unlock(&sb_lock); + DQUOT_OFF(s); down_write(&s->s_umount); fs->kill_sb(s); put_filesystem(fs); diff -urN FS-base/include/linux/acct.h FS-pinned/include/linux/acct.h --- FS-base/include/linux/acct.h 2005-10-21 19:06:45.000000000 -0400 +++ FS-pinned/include/linux/acct.h 2005-11-03 06:59:12.000000000 -0500 @@ -117,12 +117,15 @@ #include #ifdef CONFIG_BSD_PROCESS_ACCT +struct vfsmount; struct super_block; +extern void acct_auto_close_mnt(struct vfsmount *m); extern void acct_auto_close(struct super_block *sb); extern void acct_process(long exitcode); extern void acct_update_integrals(struct task_struct *tsk); extern void acct_clear_integrals(struct task_struct *tsk); #else +#define acct_auto_close_mnt(x) do { } while (0) #define acct_auto_close(x) do { } while (0) #define acct_process(x) do { } while (0) #define acct_update_integrals(x) do { } while (0) diff -urN FS-base/include/linux/mount.h FS-pinned/include/linux/mount.h --- FS-base/include/linux/mount.h 2005-10-21 19:06:46.000000000 -0400 +++ FS-pinned/include/linux/mount.h 2005-11-03 06:59:12.000000000 -0500 @@ -37,6 +37,7 @@ struct list_head mnt_list; struct list_head mnt_expire; /* link in fs-specific expiry list */ struct namespace *mnt_namespace; /* containing namespace */ + int mnt_pinned; }; static inline struct vfsmount *mntget(struct vfsmount *mnt) @@ -46,15 +47,9 @@ return mnt; } -extern void __mntput(struct vfsmount *mnt); - -static inline void mntput_no_expire(struct vfsmount *mnt) -{ - if (mnt) { - if (atomic_dec_and_test(&mnt->mnt_count)) - __mntput(mnt); - } -} +extern void mntput_no_expire(struct vfsmount *mnt); +extern void mnt_pin(struct vfsmount *mnt); +extern void mnt_unpin(struct vfsmount *mnt); static inline void mntput(struct vfsmount *mnt) { diff -urN FS-base/include/linux/quota.h FS-pinned/include/linux/quota.h --- FS-base/include/linux/quota.h 2005-10-21 19:06:46.000000000 -0400 +++ FS-pinned/include/linux/quota.h 2005-11-03 06:59:12.000000000 -0500 @@ -289,7 +289,6 @@ struct semaphore dqonoff_sem; /* Serialize quotaon & quotaoff */ struct rw_semaphore dqptr_sem; /* serialize ops using quota_info struct, pointers from inode to dquots */ struct inode *files[MAXQUOTAS]; /* inodes of quotafiles */ - struct vfsmount *mnt[MAXQUOTAS]; /* mountpoint entries of filesystems with quota files */ struct mem_dqinfo info[MAXQUOTAS]; /* Information for each quota type */ struct quota_format_ops *ops[MAXQUOTAS]; /* Operations for each type */ }; diff -urN FS-base/kernel/acct.c FS-pinned/kernel/acct.c --- FS-base/kernel/acct.c 2005-10-30 01:34:20.000000000 -0400 +++ FS-pinned/kernel/acct.c 2005-11-03 06:59:12.000000000 -0500 @@ -54,6 +54,7 @@ #include #include #include +#include #include #include #include /* sector_div */ @@ -192,6 +193,7 @@ add_timer(&acct_globals.timer); } if (old_acct) { + mnt_unpin(old_acct->f_vfsmnt); spin_unlock(&acct_globals.lock); do_acct_process(0, old_acct); filp_close(old_acct, NULL); @@ -199,6 +201,42 @@ } } +static int acct_on(char *name) +{ + struct file *file; + int error; + + /* Difference from BSD - they don't do O_APPEND */ + file = filp_open(name, O_WRONLY|O_APPEND|O_LARGEFILE, 0); + if (IS_ERR(file)) + return PTR_ERR(file); + + if (!S_ISREG(file->f_dentry->d_inode->i_mode)) { + filp_close(file, NULL); + return -EACCES; + } + + if (!file->f_op->write) { + filp_close(file, NULL); + return -EIO; + } + + error = security_acct(file); + if (error) { + filp_close(file, NULL); + return error; + } + + spin_lock(&acct_globals.lock); + mnt_pin(file->f_vfsmnt); + acct_file_reopen(file); + spin_unlock(&acct_globals.lock); + + mntput(file->f_vfsmnt); /* it's pinned, now give up active reference */ + + return 0; +} + /** * sys_acct - enable/disable process accounting * @name: file name for accounting records or NULL to shutdown accounting @@ -212,47 +250,41 @@ */ asmlinkage long sys_acct(const char __user *name) { - struct file *file = NULL; - char *tmp; int error; if (!capable(CAP_SYS_PACCT)) return -EPERM; if (name) { - tmp = getname(name); - if (IS_ERR(tmp)) { + char *tmp = getname(name); + if (IS_ERR(tmp)) return (PTR_ERR(tmp)); - } - /* Difference from BSD - they don't do O_APPEND */ - file = filp_open(tmp, O_WRONLY|O_APPEND|O_LARGEFILE, 0); + error = acct_on(tmp); putname(tmp); - if (IS_ERR(file)) { - return (PTR_ERR(file)); - } - if (!S_ISREG(file->f_dentry->d_inode->i_mode)) { - filp_close(file, NULL); - return (-EACCES); - } - - if (!file->f_op->write) { - filp_close(file, NULL); - return (-EIO); + } else { + error = security_acct(NULL); + if (!error) { + spin_lock(&acct_globals.lock); + acct_file_reopen(NULL); + spin_unlock(&acct_globals.lock); } } + return error; +} - error = security_acct(file); - if (error) { - if (file) - filp_close(file, NULL); - return error; - } - +/** + * acct_auto_close - turn off a filesystem's accounting if it is on + * @m: vfsmount being shut down + * + * If the accounting is turned on for a file in the subtree pointed to + * to by m, turn accounting off. Done when m is about to die. + */ +void acct_auto_close_mnt(struct vfsmount *m) +{ spin_lock(&acct_globals.lock); - acct_file_reopen(file); + if (acct_globals.file && acct_globals.file->f_vfsmnt == m) + acct_file_reopen(NULL); spin_unlock(&acct_globals.lock); - - return (0); } /** @@ -266,8 +298,8 @@ { spin_lock(&acct_globals.lock); if (acct_globals.file && - acct_globals.file->f_dentry->d_inode->i_sb == sb) { - acct_file_reopen((struct file *)NULL); + acct_globals.file->f_vfsmnt->mnt_sb == sb) { + acct_file_reopen(NULL); } spin_unlock(&acct_globals.lock); }