ceph: when seeing write errors on an inode, switch to sync writes

author Jeff Layton <jlayton@redhat.com>

Tue, 4 Apr 2017 12:39:46 +0000 (08:39 -0400)

committer Ilya Dryomov <idryomov@gmail.com>

Thu, 4 May 2017 07:19:22 +0000 (09:19 +0200)
author Jeff Layton <jlayton@redhat.com>
Tue, 4 Apr 2017 12:39:46 +0000 (08:39 -0400)
committer Ilya Dryomov <idryomov@gmail.com>
Thu, 4 May 2017 07:19:22 +0000 (09:19 +0200)
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c

index 6cdf94459ac4b8fbbc77535c59b27f28f7d626e9..e253102b43cd37b9dcad4b1c1c9724ac12e29f1a 100644 (file)
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -670,8 +670,12 @@ static void writepages_finish(struct ceph_osd_request *req)
         bool remove_page;
  
         dout("writepages_finish %p rc %d\n", inode, rc);
-       if (rc < 0)
+       if (rc < 0) {
                 mapping_set_error(mapping, rc);
+               ceph_set_error_write(ci);
+       } else {
+               ceph_clear_error_write(ci);
+       }
  
         /*
          * We lost the cache cap, need to truncate the page before
diff --git a/fs/ceph/file.c b/fs/ceph/file.c

index 134c978141d006503a10b306780012ac40cb3f68..39866d6a34b6515d494696ce7e9457370ec9fdda 100644 (file)
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1089,19 +1089,22 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
  
  out:
                 ceph_osdc_put_request(req);
-               if (ret == 0) {
-                       pos += len;
-                       written += len;
-
-                       if (pos > i_size_read(inode)) {
-                               check_caps = ceph_inode_set_size(inode, pos);
-                               if (check_caps)
-                                       ceph_check_caps(ceph_inode(inode),
-                                                       CHECK_CAPS_AUTHONLY,
-                                                       NULL);
-                       }
-               } else
+               if (ret != 0) {
+                       ceph_set_error_write(ci);
                         break;
+               }
+
+               ceph_clear_error_write(ci);
+               pos += len;
+               written += len;
+               if (pos > i_size_read(inode)) {
+                       check_caps = ceph_inode_set_size(inode, pos);
+                       if (check_caps)
+                               ceph_check_caps(ceph_inode(inode),
+                                               CHECK_CAPS_AUTHONLY,
+                                               NULL);
+               }
+
         }
  
         if (ret != -EOLDSNAPC && written > 0) {
@@ -1307,6 +1310,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
         }
  
  retry_snap:
+       /* FIXME: not complete since it doesn't account for being at quota */
         if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL)) {
                 err = -ENOSPC;
                 goto out;
@@ -1328,7 +1332,8 @@ retry_snap:
              inode, ceph_vinop(inode), pos, count, ceph_cap_string(got));
  
         if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 ||
-           (iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC)) {
+           (iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC) ||
+           (ci->i_ceph_flags & CEPH_I_ERROR_WRITE)) {
                 struct ceph_snap_context *snapc;
                 struct iov_iter data;
                 inode_unlock(inode);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h

index c68e6a045fb9e780dfb1cf044b316313d1bf75b6..7334ee86b9e81c4d6dbf57fc9e458f6e94020cb9 100644 (file)
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -474,6 +474,32 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
  #define CEPH_I_CAP_DROPPED     (1 << 8)  /* caps were forcibly dropped */
  #define CEPH_I_KICK_FLUSH      (1 << 9)  /* kick flushing caps */
  #define CEPH_I_FLUSH_SNAPS     (1 << 10) /* need flush snapss */
+#define CEPH_I_ERROR_WRITE     (1 << 11) /* have seen write errors */
+
+/*
+ * We set the ERROR_WRITE bit when we start seeing write errors on an inode
+ * and then clear it when they start succeeding. Note that we do a lockless
+ * check first, and only take the lock if it looks like it needs to be changed.
+ * The write submission code just takes this as a hint, so we're not too
+ * worried if a few slip through in either direction.
+ */
+static inline void ceph_set_error_write(struct ceph_inode_info *ci)
+{
+       if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ERROR_WRITE)) {
+               spin_lock(&ci->i_ceph_lock);
+               ci->i_ceph_flags |= CEPH_I_ERROR_WRITE;
+               spin_unlock(&ci->i_ceph_lock);
+       }
+}
+
+static inline void ceph_clear_error_write(struct ceph_inode_info *ci)
+{
+       if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ERROR_WRITE) {
+               spin_lock(&ci->i_ceph_lock);
+               ci->i_ceph_flags &= ~CEPH_I_ERROR_WRITE;
+               spin_unlock(&ci->i_ceph_lock);
+       }
+}
  
  static inline void __ceph_dir_set_complete(struct ceph_inode_info *ci,
                                            long long release_count,
author	Jeff Layton <jlayton@redhat.com>
	Tue, 4 Apr 2017 12:39:46 +0000 (08:39 -0400)
committer	Ilya Dryomov <idryomov@gmail.com>
	Thu, 4 May 2017 07:19:22 +0000 (09:19 +0200)
fs/ceph/addr.c		patch \| blob \| history
fs/ceph/file.c		patch \| blob \| history
fs/ceph/super.h		patch \| blob \| history