]> git.kernelconcepts.de Git - karo-tx-linux.git/commitdiff
fs: symlink write_begin allocation context fix
authorNick Piggin <npiggin@suse.de>
Sun, 4 Jan 2009 20:00:53 +0000 (12:00 -0800)
committerGreg Kroah-Hartman <gregkh@suse.de>
Sun, 18 Jan 2009 18:43:46 +0000 (10:43 -0800)
commit 54566b2c1594c2326a645a3551f9d989f7ba3c5e upstream.

With the write_begin/write_end aops, page_symlink was broken because it
could no longer pass a GFP_NOFS type mask into the point where the
allocations happened.  They are done in write_begin, which would always
assume that the filesystem can be entered from reclaim.  This bug could
cause filesystem deadlocks.

The funny thing with having a gfp_t mask there is that it doesn't really
allow the caller to arbitrarily tinker with the context in which it can be
called.  It couldn't ever be GFP_ATOMIC, for example, because it needs to
take the page lock.  The only thing any callers care about is __GFP_FS
anyway, so turn that into a single flag.

Add a new flag for write_begin, AOP_FLAG_NOFS.  Filesystems can now act on
this flag in their write_begin function.  Change __grab_cache_page to
accept a nofs argument as well, to honour that flag (while we're there,
change the name to grab_cache_page_write_begin which is more instructive
and does away with random leading underscores).

This is really a more flexible way to go in the end anyway -- if a
filesystem happens to want any extra allocations aside from the pagecache
ones in ints write_begin function, it may now use GFP_KERNEL (rather than
GFP_NOFS) for common case allocations (eg.  ocfs2_alloc_write_ctxt, for a
random example).

[kosaki.motohiro@jp.fujitsu.com: fix ubifs]
[kosaki.motohiro@jp.fujitsu.com: fix fuse]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
[ Cleaned up the calling convention: just pass in the AOP flags
  untouched to the grab_cache_page_write_begin() function.  That
  just simplifies everybody, and may even allow future expansion of the
  logic.   - Linus ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
22 files changed:
fs/affs/file.c
fs/afs/write.c
fs/buffer.c
fs/cifs/file.c
fs/ecryptfs/mmap.c
fs/ext3/inode.c
fs/ext3/namei.c
fs/ext4/inode.c
fs/ext4/namei.c
fs/fuse/file.c
fs/gfs2/ops_address.c
fs/hostfs/hostfs_kern.c
fs/jffs2/file.c
fs/libfs.c
fs/namei.c
fs/nfs/file.c
fs/reiserfs/inode.c
fs/smbfs/file.c
fs/ubifs/file.c
include/linux/fs.h
include/linux/pagemap.h
mm/filemap.c

index 1377b1240b6eadba12cf89c5b961d5d90d33c594..9246cb4aa018fafa3517685e7d7fa1572f2e5a16 100644 (file)
@@ -628,7 +628,7 @@ static int affs_write_begin_ofs(struct file *file, struct address_space *mapping
        }
 
        index = pos >> PAGE_CACHE_SHIFT;
-       page = __grab_cache_page(mapping, index);
+       page = grab_cache_page_write_begin(mapping, index, flags);
        if (!page)
                return -ENOMEM;
        *pagep = page;
index d6b85dab35fcc71a16ba310bb11848c7ae7e891d..3fb36d433621dd2bb71edcc757dbe22390ff2640 100644 (file)
@@ -144,7 +144,7 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
        candidate->state = AFS_WBACK_PENDING;
        init_waitqueue_head(&candidate->waitq);
 
-       page = __grab_cache_page(mapping, index);
+       page = grab_cache_page_write_begin(mapping, index, flags);
        if (!page) {
                kfree(candidate);
                return -ENOMEM;
index 10179cfa11528683495886657bd643ebde590ce8..e556455d23c7c9c6ca2d6f8524f3da5e3e0a2dc2 100644 (file)
@@ -1988,7 +1988,7 @@ int block_write_begin(struct file *file, struct address_space *mapping,
        page = *pagep;
        if (page == NULL) {
                ownpage = 1;
-               page = __grab_cache_page(mapping, index);
+               page = grab_cache_page_write_begin(mapping, index, flags);
                if (!page) {
                        status = -ENOMEM;
                        goto out;
@@ -2494,7 +2494,7 @@ int nobh_write_begin(struct file *file, struct address_space *mapping,
        from = pos & (PAGE_CACHE_SIZE - 1);
        to = from + len;
 
-       page = __grab_cache_page(mapping, index);
+       page = grab_cache_page_write_begin(mapping, index, flags);
        if (!page)
                return -ENOMEM;
        *pagep = page;
index f0a81e631ae60bed3ec139833a2a1a63cef45642..586d2beadcecee3b580c559569ac23c4534af85f 100644 (file)
@@ -2073,7 +2073,7 @@ static int cifs_write_begin(struct file *file, struct address_space *mapping,
 
        cFYI(1, ("write_begin from %lld len %d", (long long)pos, len));
 
-       page = __grab_cache_page(mapping, index);
+       page = grab_cache_page_write_begin(mapping, index, flags);
        if (!page) {
                rc = -ENOMEM;
                goto out;
index 04d7b3fa1ac6cd7679237cd2188a385c67bf960c..46cec2b69796d01d21cc0c92f14c345091aa0b85 100644 (file)
@@ -288,7 +288,7 @@ static int ecryptfs_write_begin(struct file *file,
        loff_t prev_page_end_size;
        int rc = 0;
 
-       page = __grab_cache_page(mapping, index);
+       page = grab_cache_page_write_begin(mapping, index, flags);
        if (!page)
                return -ENOMEM;
        *pagep = page;
index f8424ad8997195f0cdd2d2c1c53196f16651d7c8..846e8cadfbc482c7c30fa1d5cf93b60c96db93b9 100644 (file)
@@ -1160,7 +1160,7 @@ static int ext3_write_begin(struct file *file, struct address_space *mapping,
        to = from + len;
 
 retry:
-       page = __grab_cache_page(mapping, index);
+       page = grab_cache_page_write_begin(mapping, index, flags);
        if (!page)
                return -ENOMEM;
        *pagep = page;
index 3e5edc92aa0b088071d1711c4bcf620d585e0416..a48e965f3bc610c4f0b968bf5fcf9edac52e6a1e 100644 (file)
@@ -2170,8 +2170,7 @@ retry:
                 * We have a transaction open.  All is sweetness.  It also sets
                 * i_size in generic_commit_write().
                 */
-               err = __page_symlink(inode, symname, l,
-                               mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
+               err = __page_symlink(inode, symname, l, 1);
                if (err) {
                        drop_nlink(inode);
                        ext3_mark_inode_dirty(handle, inode);
index be21a5ae33cb01261a8fe1ce0e2377654a940d3b..8e46b19e913fecac0a1713f7118f28ef67c5fbbb 100644 (file)
@@ -1345,7 +1345,7 @@ retry:
                goto out;
        }
 
-       page = __grab_cache_page(mapping, index);
+       page = grab_cache_page_write_begin(mapping, index, flags);
        if (!page) {
                ext4_journal_stop(handle);
                ret = -ENOMEM;
@@ -2549,7 +2549,7 @@ retry:
                goto out;
        }
 
-       page = __grab_cache_page(mapping, index);
+       page = grab_cache_page_write_begin(mapping, index, flags);
        if (!page) {
                ext4_journal_stop(handle);
                ret = -ENOMEM;
index 63adcb792988016d642a6e341bcbca2f694b68d1..92c839762218e0087734001959e897a0c53f1ed7 100644 (file)
@@ -2208,8 +2208,7 @@ retry:
                 * We have a transaction open.  All is sweetness.  It also sets
                 * i_size in generic_commit_write().
                 */
-               err = __page_symlink(inode, symname, l,
-                               mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
+               err = __page_symlink(inode, symname, l, 1);
                if (err) {
                        clear_nlink(inode);
                        ext4_mark_inode_dirty(handle, inode);
index 34930a964b8258067ec957f9e5ff2a974c09cc58..4c9ee7011265a69faded2eee357fb23a37168bb1 100644 (file)
@@ -646,7 +646,7 @@ static int fuse_write_begin(struct file *file, struct address_space *mapping,
 {
        pgoff_t index = pos >> PAGE_CACHE_SHIFT;
 
-       *pagep = __grab_cache_page(mapping, index);
+       *pagep = grab_cache_page_write_begin(mapping, index, flags);
        if (!*pagep)
                return -ENOMEM;
        return 0;
@@ -779,7 +779,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
                        break;
 
                err = -ENOMEM;
-               page = __grab_cache_page(mapping, index);
+               page = grab_cache_page_write_begin(mapping, index, 0);
                if (!page)
                        break;
 
index 27563816e1c5a0f086657b12b0fab0cbc861fbef..15f710f2d4da1cf6aa1e9dea10f017788f442d54 100644 (file)
@@ -675,7 +675,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
                goto out_trans_fail;
 
        error = -ENOMEM;
-       page = __grab_cache_page(mapping, index);
+       page = grab_cache_page_write_begin(mapping, index, flags);
        *pagep = page;
        if (unlikely(!page))
                goto out_endtrans;
index 3a31451ac1704a86ef3d3b7ae9e9a73240ff29ad..5c538e0ec14beeba40899ea9eb20b83b8446388e 100644 (file)
@@ -501,7 +501,7 @@ int hostfs_write_begin(struct file *file, struct address_space *mapping,
 {
        pgoff_t index = pos >> PAGE_CACHE_SHIFT;
 
-       *pagep = __grab_cache_page(mapping, index);
+       *pagep = grab_cache_page_write_begin(mapping, index, flags);
        if (!*pagep)
                return -ENOMEM;
        return 0;
index 5a98aa87c8530b169ec0453f977860550df73f38..5edc2bf2058134505369a0f7844363f7e3a2ecfd 100644 (file)
@@ -132,7 +132,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
        uint32_t pageofs = index << PAGE_CACHE_SHIFT;
        int ret = 0;
 
-       pg = __grab_cache_page(mapping, index);
+       pg = grab_cache_page_write_begin(mapping, index, flags);
        if (!pg)
                return -ENOMEM;
        *pagep = pg;
index e960a8321902cad9398447f3434f31f90a0f2b3c..bdaec17fa388a8ec98bd03f7b393b193aec2627b 100644 (file)
@@ -360,7 +360,7 @@ int simple_write_begin(struct file *file, struct address_space *mapping,
        index = pos >> PAGE_CACHE_SHIFT;
        from = pos & (PAGE_CACHE_SIZE - 1);
 
-       page = __grab_cache_page(mapping, index);
+       page = grab_cache_page_write_begin(mapping, index, flags);
        if (!page)
                return -ENOMEM;
 
index d34e0f9681c6557d83852cf04646afbf9e670d98..2a568980494631ebc136b58b562a08e833fc9c59 100644 (file)
@@ -2786,18 +2786,23 @@ void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
        }
 }
 
-int __page_symlink(struct inode *inode, const char *symname, int len,
-               gfp_t gfp_mask)
+/*
+ * The nofs argument instructs pagecache_write_begin to pass AOP_FLAG_NOFS
+ */
+int __page_symlink(struct inode *inode, const char *symname, int len, int nofs)
 {
        struct address_space *mapping = inode->i_mapping;
        struct page *page;
        void *fsdata;
        int err;
        char *kaddr;
+       unsigned int flags = AOP_FLAG_UNINTERRUPTIBLE;
+       if (nofs)
+               flags |= AOP_FLAG_NOFS;
 
 retry:
        err = pagecache_write_begin(NULL, mapping, 0, len-1,
-                               AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
+                               flags, &page, &fsdata);
        if (err)
                goto fail;
 
@@ -2821,7 +2826,7 @@ fail:
 int page_symlink(struct inode *inode, const char *symname, int len)
 {
        return __page_symlink(inode, symname, len,
-                       mapping_gfp_mask(inode->i_mapping));
+                       !(mapping_gfp_mask(inode->i_mapping) & __GFP_FS));
 }
 
 const struct inode_operations page_symlink_inode_operations = {
index d319b49f8f06f4046c408afe8a290a9e05b7920e..90f292b520d25eec248a9728b7d61cf20cd061f9 100644 (file)
@@ -354,7 +354,7 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
                file->f_path.dentry->d_name.name,
                mapping->host->i_ino, len, (long long) pos);
 
-       page = __grab_cache_page(mapping, index);
+       page = grab_cache_page_write_begin(mapping, index, flags);
        if (!page)
                return -ENOMEM;
        *pagep = page;
index 6c4c2c69449f6bc74dddf5c42a0daea09ccf6adf..8a6bfb4aca7dddb25dcd11d3f8548e7e32a2eb80 100644 (file)
@@ -2556,7 +2556,7 @@ static int reiserfs_write_begin(struct file *file,
        }
 
        index = pos >> PAGE_CACHE_SHIFT;
-       page = __grab_cache_page(mapping, index);
+       page = grab_cache_page_write_begin(mapping, index, flags);
        if (!page)
                return -ENOMEM;
        *pagep = page;
index e4f8d51a5553651bd0235b2d4310ac9c44f32fb6..92d5e8ffb63923062680d09906f8baa7ad53d8c8 100644 (file)
@@ -297,7 +297,7 @@ static int smb_write_begin(struct file *file, struct address_space *mapping,
                        struct page **pagep, void **fsdata)
 {
        pgoff_t index = pos >> PAGE_CACHE_SHIFT;
-       *pagep = __grab_cache_page(mapping, index);
+       *pagep = grab_cache_page_write_begin(mapping, index, flags);
        if (!*pagep)
                return -ENOMEM;
        return 0;
index 2624411d9758d11023c938418f8a6a7938066ff5..b1496f5c45e5023c16f6117878a3f7b303f746ef 100644 (file)
@@ -219,7 +219,8 @@ static void release_existing_page_budget(struct ubifs_info *c)
 }
 
 static int write_begin_slow(struct address_space *mapping,
-                           loff_t pos, unsigned len, struct page **pagep)
+                           loff_t pos, unsigned len, struct page **pagep,
+                           unsigned flags)
 {
        struct inode *inode = mapping->host;
        struct ubifs_info *c = inode->i_sb->s_fs_info;
@@ -247,7 +248,7 @@ static int write_begin_slow(struct address_space *mapping,
        if (unlikely(err))
                return err;
 
-       page = __grab_cache_page(mapping, index);
+       page = grab_cache_page_write_begin(mapping, index, flags);
        if (unlikely(!page)) {
                ubifs_release_budget(c, &req);
                return -ENOMEM;
@@ -438,7 +439,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
                return -EROFS;
 
        /* Try out the fast-path part first */
-       page = __grab_cache_page(mapping, index);
+       page = grab_cache_page_write_begin(mapping, index, flags);
        if (unlikely(!page))
                return -ENOMEM;
 
@@ -483,7 +484,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
                unlock_page(page);
                page_cache_release(page);
 
-               return write_begin_slow(mapping, pos, len, pagep);
+               return write_begin_slow(mapping, pos, len, pagep, flags);
        }
 
        /*
index 4a853ef6fd35076d4aad21b488a97a513fb7bacc..efeff3dd95cf9135ccc95d24bd94f11cf01f3261 100644 (file)
@@ -414,6 +414,9 @@ enum positive_aop_returns {
 
 #define AOP_FLAG_UNINTERRUPTIBLE       0x0001 /* will not do a short write */
 #define AOP_FLAG_CONT_EXPAND           0x0002 /* called from cont_expand */
+#define AOP_FLAG_NOFS                  0x0004 /* used by filesystem to direct
+                                               * helper code (eg buffer layer)
+                                               * to clear GFP_FS from alloc */
 
 /*
  * oh the beauties of C type declarations.
@@ -2023,7 +2026,7 @@ extern int page_readlink(struct dentry *, char __user *, int);
 extern void *page_follow_link_light(struct dentry *, struct nameidata *);
 extern void page_put_link(struct dentry *, struct nameidata *, void *);
 extern int __page_symlink(struct inode *inode, const char *symname, int len,
-               gfp_t gfp_mask);
+               int nofs);
 extern int page_symlink(struct inode *inode, const char *symname, int len);
 extern const struct inode_operations page_symlink_inode_operations;
 extern int generic_readlink(struct dentry *, char __user *, int);
index 709742be02f0a1bac5336d5e38b172a2b7d9f2b7..01ca0856caff38a8d910df0915e4bb6cc93be536 100644 (file)
@@ -241,7 +241,8 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
 unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
                        int tag, unsigned int nr_pages, struct page **pages);
 
-struct page *__grab_cache_page(struct address_space *mapping, pgoff_t index);
+struct page *grab_cache_page_write_begin(struct address_space *mapping,
+                       pgoff_t index, unsigned flags);
 
 /*
  * Returns locked page at given index in given cache, creating it if needed.
index f3e5f8944d1763474b4c19e411af129ed03ab324..f8c69273c37f6656b7b3504e5f87881be395e12c 100644 (file)
@@ -2140,19 +2140,24 @@ EXPORT_SYMBOL(generic_file_direct_write);
  * Find or create a page at the given pagecache position. Return the locked
  * page. This function is specifically for buffered writes.
  */
-struct page *__grab_cache_page(struct address_space *mapping, pgoff_t index)
+struct page *grab_cache_page_write_begin(struct address_space *mapping,
+                                       pgoff_t index, unsigned flags)
 {
        int status;
        struct page *page;
+       gfp_t gfp_notmask = 0;
+       if (flags & AOP_FLAG_NOFS)
+               gfp_notmask = __GFP_FS;
 repeat:
        page = find_lock_page(mapping, index);
        if (likely(page))
                return page;
 
-       page = page_cache_alloc(mapping);
+       page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~gfp_notmask);
        if (!page)
                return NULL;
-       status = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL);
+       status = add_to_page_cache_lru(page, mapping, index,
+                                               GFP_KERNEL & ~gfp_notmask);
        if (unlikely(status)) {
                page_cache_release(page);
                if (status == -EEXIST)
@@ -2161,7 +2166,7 @@ repeat:
        }
        return page;
 }
-EXPORT_SYMBOL(__grab_cache_page);
+EXPORT_SYMBOL(grab_cache_page_write_begin);
 
 static ssize_t generic_perform_write(struct file *file,
                                struct iov_iter *i, loff_t pos)