Merge branch 'for-linus' of git://git.open-osd.org/linux-open-osd

author Linus Torvalds <torvalds@linux-foundation.org>

Mon, 9 Jan 2012 20:51:01 +0000 (12:51 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Mon, 9 Jan 2012 20:51:01 +0000 (12:51 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Mon, 9 Jan 2012 20:51:01 +0000 (12:51 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Mon, 9 Jan 2012 20:51:01 +0000 (12:51 -0800)
diff --git a/fs/Kconfig b/fs/Kconfig

index 30145d886bc24290d2231db9fbda78837c83981e..d621f02a3f9e26ee9f98160566981c12be69e395 100644 (file)
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -218,6 +218,8 @@ source "fs/exofs/Kconfig"
  
  endif # MISC_FILESYSTEMS
  
+source "fs/exofs/Kconfig.ore"
+
  menuconfig NETWORK_FILESYSTEMS
         bool "Network File Systems"
         default y
diff --git a/fs/exofs/Kconfig b/fs/exofs/Kconfig

index da42f32c49be962d38718fa08603b20e4b603ab5..86194b2f799dc2f6b29f352cba159e19d1b074e6 100644 (file)
--- a/fs/exofs/Kconfig
+++ b/fs/exofs/Kconfig
@@ -1,14 +1,3 @@
-# Note ORE needs to "select ASYNC_XOR". So Not to force multiple selects
-# for every ORE user we do it like this. Any user should add itself here
-# at the "depends on EXOFS_FS || ..." with an ||. The dependencies are
-# selected here, and we default to "ON". So in effect it is like been
-# selected by any of the users.
-config ORE
-       tristate
-       depends on EXOFS_FS || PNFS_OBJLAYOUT
-       select ASYNC_XOR
-       default SCSI_OSD_ULD
-
  config EXOFS_FS
         tristate "exofs: OSD based file system support"
         depends on SCSI_OSD_ULD
diff --git a/fs/exofs/Kconfig.ore b/fs/exofs/Kconfig.ore

new file mode 100644 (file)

index 0000000..1ca7fb7
--- /dev/null
+++ b/fs/exofs/Kconfig.ore
@@ -0,0 +1,12 @@
+# ORE - Objects Raid Engine (libore.ko)
+#
+# Note ORE needs to "select ASYNC_XOR". So Not to force multiple selects
+# for every ORE user we do it like this. Any user should add itself here
+# at the "depends on EXOFS_FS || ..." with an ||. The dependencies are
+# selected here, and we default to "ON". So in effect it is like been
+# selected by any of the users.
+config ORE
+       tristate
+       depends on EXOFS_FS || PNFS_OBJLAYOUT
+       select ASYNC_XOR
+       default SCSI_OSD_ULD
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c

index d271ad837202f819d73e4de97880336ec4ecbfcd..49cf230554a21d33785d16367d9397ab34a44d91 100644 (file)
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -266,7 +266,7 @@ int  ore_get_rw_state(struct ore_layout *layout, struct ore_components *oc,
  
                         /* first/last seg is split */
                         num_raid_units += layout->group_width;
-                       sgs_per_dev = div_u64(num_raid_units, data_devs);
+                       sgs_per_dev = div_u64(num_raid_units, data_devs) + 2;
                 } else {
                         /* For Writes add parity pages array. */
                         max_par_pages = num_raid_units * pages_in_unit *
@@ -445,10 +445,10 @@ int ore_check_io(struct ore_io_state *ios, ore_on_dev_error on_dev_error)
                         u64 residual = ios->reading ?
                                         or->in.residual : or->out.residual;
                         u64 offset = (ios->offset + ios->length) - residual;
-                       struct ore_dev *od = ios->oc->ods[
-                                       per_dev->dev - ios->oc->first_dev];
+                       unsigned dev = per_dev->dev - ios->oc->first_dev;
+                       struct ore_dev *od = ios->oc->ods[dev];
  
-                       on_dev_error(ios, od, per_dev->dev, osi.osd_err_pri,
+                       on_dev_error(ios, od, dev, osi.osd_err_pri,
                                      offset, residual);
                 }
                 if (osi.osd_err_pri >= acumulated_osd_err) {
diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c

index 29c47e5c4a86888a5dfae3e98a76f83849ce9c6f..d222c77cfa1ba0669ca7580c420a1c31da35c86a 100644 (file)
--- a/fs/exofs/ore_raid.c
+++ b/fs/exofs/ore_raid.c
@@ -328,8 +328,8 @@ static int _alloc_read_4_write(struct ore_io_state *ios)
  /* @si contains info of the to-be-inserted page. Update of @si should be
   * maintained by caller. Specificaly si->dev, si->obj_offset, ...
   */
-static int _add_to_read_4_write(struct ore_io_state *ios,
-                               struct ore_striping_info *si, struct page *page)
+static int _add_to_r4w(struct ore_io_state *ios, struct ore_striping_info *si,
+                      struct page *page, unsigned pg_len)
  {
         struct request_queue *q;
         struct ore_per_dev_state *per_dev;
@@ -366,17 +366,60 @@ static int _add_to_read_4_write(struct ore_io_state *ios,
                 _ore_add_sg_seg(per_dev, gap, true);
         }
         q = osd_request_queue(ore_comp_dev(read_ios->oc, per_dev->dev));
-       added_len = bio_add_pc_page(q, per_dev->bio, page, PAGE_SIZE, 0);
-       if (unlikely(added_len != PAGE_SIZE)) {
+       added_len = bio_add_pc_page(q, per_dev->bio, page, pg_len,
+                                   si->obj_offset % PAGE_SIZE);
+       if (unlikely(added_len != pg_len)) {
                 ORE_DBGMSG("Failed to bio_add_pc_page bi_vcnt=%d\n",
                               per_dev->bio->bi_vcnt);
                 return -ENOMEM;
         }
  
-       per_dev->length += PAGE_SIZE;
+       per_dev->length += pg_len;
         return 0;
  }
  
+/* read the beginning of an unaligned first page */
+static int _add_to_r4w_first_page(struct ore_io_state *ios, struct page *page)
+{
+       struct ore_striping_info si;
+       unsigned pg_len;
+
+       ore_calc_stripe_info(ios->layout, ios->offset, 0, &si);
+
+       pg_len = si.obj_offset % PAGE_SIZE;
+       si.obj_offset -= pg_len;
+
+       ORE_DBGMSG("offset=0x%llx len=0x%x index=0x%lx dev=%x\n",
+                  _LLU(si.obj_offset), pg_len, page->index, si.dev);
+
+       return _add_to_r4w(ios, &si, page, pg_len);
+}
+
+/* read the end of an incomplete last page */
+static int _add_to_r4w_last_page(struct ore_io_state *ios, u64 *offset)
+{
+       struct ore_striping_info si;
+       struct page *page;
+       unsigned pg_len, p, c;
+
+       ore_calc_stripe_info(ios->layout, *offset, 0, &si);
+
+       p = si.unit_off / PAGE_SIZE;
+       c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1,
+                      ios->layout->mirrors_p1, si.par_dev, si.dev);
+       page = ios->sp2d->_1p_stripes[p].pages[c];
+
+       pg_len = PAGE_SIZE - (si.unit_off % PAGE_SIZE);
+       *offset += pg_len;
+
+       ORE_DBGMSG("p=%d, c=%d next-offset=0x%llx len=0x%x dev=%x par_dev=%d\n",
+                  p, c, _LLU(*offset), pg_len, si.dev, si.par_dev);
+
+       BUG_ON(!page);
+
+       return _add_to_r4w(ios, &si, page, pg_len);
+}
+
  static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret)
  {
         struct bio_vec *bv;
@@ -444,9 +487,13 @@ static int _read_4_write(struct ore_io_state *ios)
                         struct page **pp = &_1ps->pages[c];
                         bool uptodate;
  
-                       if (*pp)
+                       if (*pp) {
+                               if (ios->offset % PAGE_SIZE)
+                                       /* Read the remainder of the page */
+                                       _add_to_r4w_first_page(ios, *pp);
                                 /* to-be-written pages start here */
                                 goto read_last_stripe;
+                       }
  
                         *pp = ios->r4w->get_page(ios->private, offset,
                                                  &uptodate);
@@ -454,7 +501,7 @@ static int _read_4_write(struct ore_io_state *ios)
                                 return -ENOMEM;
  
                         if (!uptodate)
-                               _add_to_read_4_write(ios, &read_si, *pp);
+                               _add_to_r4w(ios, &read_si, *pp, PAGE_SIZE);
  
                         /* Mark read-pages to be cache_released */
                         _1ps->page_is_read[c] = true;
@@ -465,8 +512,11 @@ static int _read_4_write(struct ore_io_state *ios)
         }
  
  read_last_stripe:
-       offset = ios->offset + (ios->length + PAGE_SIZE - 1) /
-                               PAGE_SIZE * PAGE_SIZE;
+       offset = ios->offset + ios->length;
+       if (offset % PAGE_SIZE)
+               _add_to_r4w_last_page(ios, &offset);
+               /* offset will be aligned to next page */
+
         last_stripe_end = div_u64(offset + bytes_in_stripe - 1, bytes_in_stripe)
                                  * bytes_in_stripe;
         if (offset == last_stripe_end) /* Optimize for the aligned case */
@@ -503,7 +553,7 @@ read_last_stripe:
                         /* Mark read-pages to be cache_released */
                         _1ps->page_is_read[c] = true;
                         if (!uptodate)
-                               _add_to_read_4_write(ios, &read_si, page);
+                               _add_to_r4w(ios, &read_si, page, PAGE_SIZE);
                 }
  
                 offset += PAGE_SIZE;
@@ -551,7 +601,11 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
                             unsigned cur_len)
  {
         if (ios->reading) {
-               BUG_ON(per_dev->cur_sg >= ios->sgs_per_dev);
+               if (per_dev->cur_sg >= ios->sgs_per_dev) {
+                       ORE_DBGMSG("cur_sg(%d) >= sgs_per_dev(%d)\n" ,
+                               per_dev->cur_sg, ios->sgs_per_dev);
+                       return -ENOMEM;
+               }
                 _ore_add_sg_seg(per_dev, cur_len, true);
         } else {
                 struct __stripe_pages_2d *sp2d = ios->sp2d;
@@ -612,8 +666,6 @@ int _ore_post_alloc_raid_stuff(struct ore_io_state *ios)
                         return -ENOMEM;
                 }
  
-               BUG_ON(ios->offset % PAGE_SIZE);
-
                 /* Round io down to last full strip */
                 first_stripe = div_u64(ios->offset, stripe_size);
                 last_stripe = div_u64(ios->offset + ios->length, stripe_size);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 9 Jan 2012 20:51:01 +0000 (12:51 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 9 Jan 2012 20:51:01 +0000 (12:51 -0800)
fs/Kconfig		patch \| blob \| history
fs/exofs/Kconfig		patch \| blob \| history
fs/exofs/Kconfig.ore	[new file with mode: 0644]	patch \| blob
fs/exofs/ore.c		patch \| blob \| history
fs/exofs/ore_raid.c		patch \| blob \| history