seqlock: Better document raw_write_seqcount_latch()

author Peter Zijlstra <peterz@infradead.org>

Wed, 27 May 2015 01:39:36 +0000 (11:09 +0930)

committer Rusty Russell <rusty@rustcorp.com.au>

Thu, 28 May 2015 02:02:04 +0000 (11:32 +0930)
author Peter Zijlstra <peterz@infradead.org>
Wed, 27 May 2015 01:39:36 +0000 (11:09 +0930)
committer Rusty Russell <rusty@rustcorp.com.au>
Thu, 28 May 2015 02:02:04 +0000 (11:32 +0930)
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h

index 5f68d0a391cee8506f8e0d94cda72d8bd357b10f..1c0cf3102fdc327a4474d51f6da13d9ee80b1b20 100644 (file)
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -233,9 +233,83 @@ static inline void raw_write_seqcount_end(seqcount_t *s)
         s->sequence++;
  }
  
-/*
+/**
   * raw_write_seqcount_latch - redirect readers to even/odd copy
   * @s: pointer to seqcount_t
+ *
+ * The latch technique is a multiversion concurrency control method that allows
+ * queries during non-atomic modifications. If you can guarantee queries never
+ * interrupt the modification -- e.g. the concurrency is strictly between CPUs
+ * -- you most likely do not need this.
+ *
+ * Where the traditional RCU/lockless data structures rely on atomic
+ * modifications to ensure queries observe either the old or the new state the
+ * latch allows the same for non-atomic updates. The trade-off is doubling the
+ * cost of storage; we have to maintain two copies of the entire data
+ * structure.
+ *
+ * Very simply put: we first modify one copy and then the other. This ensures
+ * there is always one copy in a stable state, ready to give us an answer.
+ *
+ * The basic form is a data structure like:
+ *
+ * struct latch_struct {
+ *     seqcount_t              seq;
+ *     struct data_struct      data[2];
+ * };
+ *
+ * Where a modification, which is assumed to be externally serialized, does the
+ * following:
+ *
+ * void latch_modify(struct latch_struct *latch, ...)
+ * {
+ *     smp_wmb();      <- Ensure that the last data[1] update is visible
+ *     latch->seq++;
+ *     smp_wmb();      <- Ensure that the seqcount update is visible
+ *
+ *     modify(latch->data[0], ...);
+ *
+ *     smp_wmb();      <- Ensure that the data[0] update is visible
+ *     latch->seq++;
+ *     smp_wmb();      <- Ensure that the seqcount update is visible
+ *
+ *     modify(latch->data[1], ...);
+ * }
+ *
+ * The query will have a form like:
+ *
+ * struct entry *latch_query(struct latch_struct *latch, ...)
+ * {
+ *     struct entry *entry;
+ *     unsigned seq, idx;
+ *
+ *     do {
+ *             seq = latch->seq;
+ *             smp_rmb();
+ *
+ *             idx = seq & 0x01;
+ *             entry = data_query(latch->data[idx], ...);
+ *
+ *             smp_rmb();
+ *     } while (seq != latch->seq);
+ *
+ *     return entry;
+ * }
+ *
+ * So during the modification, queries are first redirected to data[1]. Then we
+ * modify data[0]. When that is complete, we redirect queries back to data[0]
+ * and we can modify data[1].
+ *
+ * NOTE: The non-requirement for atomic modifications does _NOT_ include
+ *       the publishing of new entries in the case where data is a dynamic
+ *       data structure.
+ *
+ *       An iteration might start in data[0] and get suspended long enough
+ *       to miss an entire modification sequence, once it resumes it might
+ *       observe the new entry.
+ *
+ * NOTE: When data is a dynamic data structure; one should use regular RCU
+ *       patterns to manage the lifetimes of the objects within.
   */
  static inline void raw_write_seqcount_latch(seqcount_t *s)
  {
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c

index 946acb72179facb1c173e54592b3c1c3637f8abd..cbfedddbf0cb07be2c474e3881777ca033580415 100644 (file)
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -330,32 +330,7 @@ static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
   * We want to use this from any context including NMI and tracing /
   * instrumenting the timekeeping code itself.
   *
- * So we handle this differently than the other timekeeping accessor
- * functions which retry when the sequence count has changed. The
- * update side does:
- *
- * smp_wmb();  <- Ensure that the last base[1] update is visible
- * tkf->seq++;
- * smp_wmb();  <- Ensure that the seqcount update is visible
- * update(tkf->base[0], tkr);
- * smp_wmb();  <- Ensure that the base[0] update is visible
- * tkf->seq++;
- * smp_wmb();  <- Ensure that the seqcount update is visible
- * update(tkf->base[1], tkr);
- *
- * The reader side does:
- *
- * do {
- *     seq = tkf->seq;
- *     smp_rmb();
- *     idx = seq & 0x01;
- *     now = now(tkf->base[idx]);
- *     smp_rmb();
- * } while (seq != tkf->seq)
- *
- * As long as we update base[0] readers are forced off to
- * base[1]. Once base[0] is updated readers are redirected to base[0]
- * and the base[1] update takes place.
+ * Employ the latch technique; see @raw_write_seqcount_latch.
   *
   * So if a NMI hits the update of base[0] then it will use base[1]
   * which is still consistent. In the worst case this can result is a
author	Peter Zijlstra <peterz@infradead.org>
	Wed, 27 May 2015 01:39:36 +0000 (11:09 +0930)
committer	Rusty Russell <rusty@rustcorp.com.au>
	Thu, 28 May 2015 02:02:04 +0000 (11:32 +0930)
include/linux/seqlock.h		patch \| blob \| history
kernel/time/timekeeping.c		patch \| blob \| history