]> git.kernelconcepts.de Git - karo-tx-linux.git/commitdiff
Merge branch 'for-linus' of git://git.open-osd.org/linux-open-osd
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 9 Jan 2012 20:51:01 +0000 (12:51 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 9 Jan 2012 20:51:01 +0000 (12:51 -0800)
* 'for-linus' of git://git.open-osd.org/linux-open-osd:
  ore: Must support none-PAGE-aligned IO
  ore: fix BUG_ON, too few sgs when reading
  ore: Fix crash in case of an IO error.
  ore: FIX breakage when MISC_FILESYSTEMS is not set

fs/Kconfig
fs/exofs/Kconfig
fs/exofs/Kconfig.ore [new file with mode: 0644]
fs/exofs/ore.c
fs/exofs/ore_raid.c

index 30145d886bc24290d2231db9fbda78837c83981e..d621f02a3f9e26ee9f98160566981c12be69e395 100644 (file)
@@ -218,6 +218,8 @@ source "fs/exofs/Kconfig"
 
 endif # MISC_FILESYSTEMS
 
+source "fs/exofs/Kconfig.ore"
+
 menuconfig NETWORK_FILESYSTEMS
        bool "Network File Systems"
        default y
index da42f32c49be962d38718fa08603b20e4b603ab5..86194b2f799dc2f6b29f352cba159e19d1b074e6 100644 (file)
@@ -1,14 +1,3 @@
-# Note ORE needs to "select ASYNC_XOR". So Not to force multiple selects
-# for every ORE user we do it like this. Any user should add itself here
-# at the "depends on EXOFS_FS || ..." with an ||. The dependencies are
-# selected here, and we default to "ON". So in effect it is like been
-# selected by any of the users.
-config ORE
-       tristate
-       depends on EXOFS_FS || PNFS_OBJLAYOUT
-       select ASYNC_XOR
-       default SCSI_OSD_ULD
-
 config EXOFS_FS
        tristate "exofs: OSD based file system support"
        depends on SCSI_OSD_ULD
diff --git a/fs/exofs/Kconfig.ore b/fs/exofs/Kconfig.ore
new file mode 100644 (file)
index 0000000..1ca7fb7
--- /dev/null
@@ -0,0 +1,12 @@
+# ORE - Objects Raid Engine (libore.ko)
+#
+# Note ORE needs to "select ASYNC_XOR". So Not to force multiple selects
+# for every ORE user we do it like this. Any user should add itself here
+# at the "depends on EXOFS_FS || ..." with an ||. The dependencies are
+# selected here, and we default to "ON". So in effect it is like been
+# selected by any of the users.
+config ORE
+       tristate
+       depends on EXOFS_FS || PNFS_OBJLAYOUT
+       select ASYNC_XOR
+       default SCSI_OSD_ULD
index d271ad837202f819d73e4de97880336ec4ecbfcd..49cf230554a21d33785d16367d9397ab34a44d91 100644 (file)
@@ -266,7 +266,7 @@ int  ore_get_rw_state(struct ore_layout *layout, struct ore_components *oc,
 
                        /* first/last seg is split */
                        num_raid_units += layout->group_width;
-                       sgs_per_dev = div_u64(num_raid_units, data_devs);
+                       sgs_per_dev = div_u64(num_raid_units, data_devs) + 2;
                } else {
                        /* For Writes add parity pages array. */
                        max_par_pages = num_raid_units * pages_in_unit *
@@ -445,10 +445,10 @@ int ore_check_io(struct ore_io_state *ios, ore_on_dev_error on_dev_error)
                        u64 residual = ios->reading ?
                                        or->in.residual : or->out.residual;
                        u64 offset = (ios->offset + ios->length) - residual;
-                       struct ore_dev *od = ios->oc->ods[
-                                       per_dev->dev - ios->oc->first_dev];
+                       unsigned dev = per_dev->dev - ios->oc->first_dev;
+                       struct ore_dev *od = ios->oc->ods[dev];
 
-                       on_dev_error(ios, od, per_dev->dev, osi.osd_err_pri,
+                       on_dev_error(ios, od, dev, osi.osd_err_pri,
                                     offset, residual);
                }
                if (osi.osd_err_pri >= acumulated_osd_err) {
index 29c47e5c4a86888a5dfae3e98a76f83849ce9c6f..d222c77cfa1ba0669ca7580c420a1c31da35c86a 100644 (file)
@@ -328,8 +328,8 @@ static int _alloc_read_4_write(struct ore_io_state *ios)
 /* @si contains info of the to-be-inserted page. Update of @si should be
  * maintained by caller. Specificaly si->dev, si->obj_offset, ...
  */
-static int _add_to_read_4_write(struct ore_io_state *ios,
-                               struct ore_striping_info *si, struct page *page)
+static int _add_to_r4w(struct ore_io_state *ios, struct ore_striping_info *si,
+                      struct page *page, unsigned pg_len)
 {
        struct request_queue *q;
        struct ore_per_dev_state *per_dev;
@@ -366,17 +366,60 @@ static int _add_to_read_4_write(struct ore_io_state *ios,
                _ore_add_sg_seg(per_dev, gap, true);
        }
        q = osd_request_queue(ore_comp_dev(read_ios->oc, per_dev->dev));
-       added_len = bio_add_pc_page(q, per_dev->bio, page, PAGE_SIZE, 0);
-       if (unlikely(added_len != PAGE_SIZE)) {
+       added_len = bio_add_pc_page(q, per_dev->bio, page, pg_len,
+                                   si->obj_offset % PAGE_SIZE);
+       if (unlikely(added_len != pg_len)) {
                ORE_DBGMSG("Failed to bio_add_pc_page bi_vcnt=%d\n",
                              per_dev->bio->bi_vcnt);
                return -ENOMEM;
        }
 
-       per_dev->length += PAGE_SIZE;
+       per_dev->length += pg_len;
        return 0;
 }
 
+/* read the beginning of an unaligned first page */
+static int _add_to_r4w_first_page(struct ore_io_state *ios, struct page *page)
+{
+       struct ore_striping_info si;
+       unsigned pg_len;
+
+       ore_calc_stripe_info(ios->layout, ios->offset, 0, &si);
+
+       pg_len = si.obj_offset % PAGE_SIZE;
+       si.obj_offset -= pg_len;
+
+       ORE_DBGMSG("offset=0x%llx len=0x%x index=0x%lx dev=%x\n",
+                  _LLU(si.obj_offset), pg_len, page->index, si.dev);
+
+       return _add_to_r4w(ios, &si, page, pg_len);
+}
+
+/* read the end of an incomplete last page */
+static int _add_to_r4w_last_page(struct ore_io_state *ios, u64 *offset)
+{
+       struct ore_striping_info si;
+       struct page *page;
+       unsigned pg_len, p, c;
+
+       ore_calc_stripe_info(ios->layout, *offset, 0, &si);
+
+       p = si.unit_off / PAGE_SIZE;
+       c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1,
+                      ios->layout->mirrors_p1, si.par_dev, si.dev);
+       page = ios->sp2d->_1p_stripes[p].pages[c];
+
+       pg_len = PAGE_SIZE - (si.unit_off % PAGE_SIZE);
+       *offset += pg_len;
+
+       ORE_DBGMSG("p=%d, c=%d next-offset=0x%llx len=0x%x dev=%x par_dev=%d\n",
+                  p, c, _LLU(*offset), pg_len, si.dev, si.par_dev);
+
+       BUG_ON(!page);
+
+       return _add_to_r4w(ios, &si, page, pg_len);
+}
+
 static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret)
 {
        struct bio_vec *bv;
@@ -444,9 +487,13 @@ static int _read_4_write(struct ore_io_state *ios)
                        struct page **pp = &_1ps->pages[c];
                        bool uptodate;
 
-                       if (*pp)
+                       if (*pp) {
+                               if (ios->offset % PAGE_SIZE)
+                                       /* Read the remainder of the page */
+                                       _add_to_r4w_first_page(ios, *pp);
                                /* to-be-written pages start here */
                                goto read_last_stripe;
+                       }
 
                        *pp = ios->r4w->get_page(ios->private, offset,
                                                 &uptodate);
@@ -454,7 +501,7 @@ static int _read_4_write(struct ore_io_state *ios)
                                return -ENOMEM;
 
                        if (!uptodate)
-                               _add_to_read_4_write(ios, &read_si, *pp);
+                               _add_to_r4w(ios, &read_si, *pp, PAGE_SIZE);
 
                        /* Mark read-pages to be cache_released */
                        _1ps->page_is_read[c] = true;
@@ -465,8 +512,11 @@ static int _read_4_write(struct ore_io_state *ios)
        }
 
 read_last_stripe:
-       offset = ios->offset + (ios->length + PAGE_SIZE - 1) /
-                               PAGE_SIZE * PAGE_SIZE;
+       offset = ios->offset + ios->length;
+       if (offset % PAGE_SIZE)
+               _add_to_r4w_last_page(ios, &offset);
+               /* offset will be aligned to next page */
+
        last_stripe_end = div_u64(offset + bytes_in_stripe - 1, bytes_in_stripe)
                                 * bytes_in_stripe;
        if (offset == last_stripe_end) /* Optimize for the aligned case */
@@ -503,7 +553,7 @@ read_last_stripe:
                        /* Mark read-pages to be cache_released */
                        _1ps->page_is_read[c] = true;
                        if (!uptodate)
-                               _add_to_read_4_write(ios, &read_si, page);
+                               _add_to_r4w(ios, &read_si, page, PAGE_SIZE);
                }
 
                offset += PAGE_SIZE;
@@ -551,7 +601,11 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
                            unsigned cur_len)
 {
        if (ios->reading) {
-               BUG_ON(per_dev->cur_sg >= ios->sgs_per_dev);
+               if (per_dev->cur_sg >= ios->sgs_per_dev) {
+                       ORE_DBGMSG("cur_sg(%d) >= sgs_per_dev(%d)\n" ,
+                               per_dev->cur_sg, ios->sgs_per_dev);
+                       return -ENOMEM;
+               }
                _ore_add_sg_seg(per_dev, cur_len, true);
        } else {
                struct __stripe_pages_2d *sp2d = ios->sp2d;
@@ -612,8 +666,6 @@ int _ore_post_alloc_raid_stuff(struct ore_io_state *ios)
                        return -ENOMEM;
                }
 
-               BUG_ON(ios->offset % PAGE_SIZE);
-
                /* Round io down to last full strip */
                first_stripe = div_u64(ios->offset, stripe_size);
                last_stripe = div_u64(ios->offset + ios->length, stripe_size);