.level = { &sname##_state.node[0] }, \
.rda = &sname##_data, \
.call = cr, \
- .fqs_state = RCU_GP_IDLE, \
+ .gp_state = RCU_GP_IDLE, \
.gpnum = 0UL - 300UL, \
.completed = 0UL - 300UL, \
.orphan_lock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.orphan_lock), \
*/
void rcu_sched_qs(void)
{
+ unsigned long flags;
+
if (__this_cpu_read(rcu_sched_data.cpu_no_qs.s)) {
trace_rcu_grace_period(TPS("rcu_sched"),
__this_cpu_read(rcu_sched_data.gpnum),
TPS("cpuqs"));
__this_cpu_write(rcu_sched_data.cpu_no_qs.b.norm, false);
+ if (!__this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))
+ return;
+ local_irq_save(flags);
if (__this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp)) {
__this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, false);
rcu_report_exp_rdp(&rcu_sched_state,
this_cpu_ptr(&rcu_sched_data),
true);
}
+ local_irq_restore(flags);
}
}
*/
void rcu_note_context_switch(void)
{
+ barrier(); /* Avoid RCU read-side critical sections leaking down. */
trace_rcu_utilization(TPS("Start context switch"));
rcu_sched_qs();
rcu_preempt_note_context_switch();
if (unlikely(raw_cpu_read(rcu_sched_qs_mask)))
rcu_momentary_dyntick_idle();
trace_rcu_utilization(TPS("End context switch"));
+ barrier(); /* Avoid RCU read-side critical sections leaking up. */
}
EXPORT_SYMBOL_GPL(rcu_note_context_switch);
* RCU flavors in desperate need of a quiescent state, which will normally
* be none of them). Either way, do a lightweight quiescent state for
* all RCU flavors.
+ *
+ * The barrier() calls are redundant in the common case when this is
+ * called externally, but just in case this is called from within this
+ * file.
+ *
*/
void rcu_all_qs(void)
{
+ barrier(); /* Avoid RCU read-side critical sections leaking down. */
if (unlikely(raw_cpu_read(rcu_sched_qs_mask)))
rcu_momentary_dyntick_idle();
this_cpu_inc(rcu_qs_ctr);
+ barrier(); /* Avoid RCU read-side critical sections leaking up. */
}
EXPORT_SYMBOL_GPL(rcu_all_qs);
/*
* Do one round of quiescent-state forcing.
*/
-static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
+static void rcu_gp_fqs(struct rcu_state *rsp, bool first_time)
{
- int fqs_state = fqs_state_in;
bool isidle = false;
unsigned long maxj;
struct rcu_node *rnp = rcu_get_root(rsp);
WRITE_ONCE(rsp->gp_activity, jiffies);
rsp->n_force_qs++;
- if (fqs_state == RCU_SAVE_DYNTICK) {
+ if (first_time) {
/* Collect dyntick-idle snapshots. */
if (is_sysidle_rcu_state(rsp)) {
isidle = true;
force_qs_rnp(rsp, dyntick_save_progress_counter,
&isidle, &maxj);
rcu_sysidle_report_gp(rsp, isidle, maxj);
- fqs_state = RCU_FORCE_QS;
} else {
/* Handle dyntick-idle and offline CPUs. */
isidle = true;
READ_ONCE(rsp->gp_flags) & ~RCU_GP_FLAG_FQS);
raw_spin_unlock_irq(&rnp->lock);
}
- return fqs_state;
}
/*
/* Declare grace period done. */
WRITE_ONCE(rsp->completed, rsp->gpnum);
trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end"));
- rsp->fqs_state = RCU_GP_IDLE;
+ rsp->gp_state = RCU_GP_IDLE;
rdp = this_cpu_ptr(rsp->rda);
/* Advance CBs to reduce false positives below. */
needgp = rcu_advance_cbs(rsp, rnp, rdp) || needgp;
*/
static int __noreturn rcu_gp_kthread(void *arg)
{
- int fqs_state;
+ bool first_gp_fqs;
int gf;
unsigned long j;
int ret;
}
/* Handle quiescent-state forcing. */
- fqs_state = RCU_SAVE_DYNTICK;
+ first_gp_fqs = true;
j = jiffies_till_first_fqs;
if (j > HZ) {
j = HZ;
trace_rcu_grace_period(rsp->name,
READ_ONCE(rsp->gpnum),
TPS("fqsstart"));
- fqs_state = rcu_gp_fqs(rsp, fqs_state);
+ rcu_gp_fqs(rsp, first_gp_fqs);
+ first_gp_fqs = false;
trace_rcu_grace_period(rsp->name,
READ_ONCE(rsp->gpnum),
TPS("fqsend"));
* is expected to specify a CPU.
*/
static void
-__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
+__call_rcu(struct rcu_head *head, rcu_callback_t func,
struct rcu_state *rsp, int cpu, bool lazy)
{
unsigned long flags;
/*
* Queue an RCU-sched callback for invocation after a grace period.
*/
-void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
+void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
{
__call_rcu(head, func, &rcu_sched_state, -1, 0);
}
/*
* Queue an RCU callback for invocation after a quicker grace period.
*/
-void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
+void call_rcu_bh(struct rcu_head *head, rcu_callback_t func)
{
__call_rcu(head, func, &rcu_bh_state, -1, 0);
}
* function may only be called from __kfree_rcu().
*/
void kfree_call_rcu(struct rcu_head *head,
- void (*func)(struct rcu_head *rcu))
+ rcu_callback_t func)
{
__call_rcu(head, func, rcu_state_p, -1, 1);
}
raw_spin_lock_irqsave(&rnp->lock, flags);
smp_mb__after_unlock_lock();
- WARN_ON_ONCE((rnp->expmask & mask) != mask);
+ if (!(rnp->expmask & mask)) {
+ raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ return;
+ }
rnp->expmask &= ~mask;
__rcu_report_exp_rnp(rsp, rnp, wake, flags); /* Releases rnp->lock. */
}
}
/* Invoked on each online non-idle CPU for expedited quiescent state. */
-static void synchronize_sched_expedited_cpu_stop(void *data)
+static void sync_sched_exp_handler(void *data)
{
+ struct rcu_data *rdp;
+ struct rcu_node *rnp;
+ struct rcu_state *rsp = data;
+
+ rdp = this_cpu_ptr(rsp->rda);
+ rnp = rdp->mynode;
+ if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) ||
+ __this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))
+ return;
__this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, true);
resched_cpu(smp_processor_id());
}
+/* Send IPI for expedited cleanup if needed at end of CPU-hotplug operation. */
+static void sync_sched_exp_online_cleanup(int cpu)
+{
+ struct rcu_data *rdp;
+ int ret;
+ struct rcu_node *rnp;
+ struct rcu_state *rsp = &rcu_sched_state;
+
+ rdp = per_cpu_ptr(rsp->rda, cpu);
+ rnp = rdp->mynode;
+ if (!(READ_ONCE(rnp->expmask) & rdp->grpmask))
+ return;
+ ret = smp_call_function_single(cpu, sync_sched_exp_handler, rsp, 0);
+ WARN_ON_ONCE(ret);
+}
+
/*
* Select the nodes that the upcoming expedited grace period needs
* to wait for.
*/
-static void sync_sched_exp_select_cpus(struct rcu_state *rsp)
+static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
+ smp_call_func_t func)
{
int cpu;
unsigned long flags;
unsigned long mask;
unsigned long mask_ofl_test;
unsigned long mask_ofl_ipi;
- struct rcu_data *rdp;
int ret;
struct rcu_node *rnp;
struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
if (raw_smp_processor_id() == cpu ||
- cpu_is_offline(cpu) ||
!(atomic_add_return(0, &rdtp->dynticks) & 0x1))
mask_ofl_test |= rdp->grpmask;
}
for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask <<= 1) {
if (!(mask_ofl_ipi & mask))
continue;
- rdp = per_cpu_ptr(rsp->rda, cpu);
- ret = smp_call_function_single(cpu, synchronize_sched_expedited_cpu_stop, NULL, 0);
- if (!ret)
+retry_ipi:
+ ret = smp_call_function_single(cpu, func, rsp, 0);
+ if (!ret) {
mask_ofl_ipi &= ~mask;
+ } else {
+ /* Failed, raced with offline. */
+ raw_spin_lock_irqsave(&rnp->lock, flags);
+ if (cpu_online(cpu) &&
+ (rnp->expmask & mask)) {
+ raw_spin_unlock_irqrestore(&rnp->lock,
+ flags);
+ schedule_timeout_uninterruptible(1);
+ if (cpu_online(cpu) &&
+ (rnp->expmask & mask))
+ goto retry_ipi;
+ raw_spin_lock_irqsave(&rnp->lock,
+ flags);
+ }
+ if (!(rnp->expmask & mask))
+ mask_ofl_ipi &= ~mask;
+ raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ }
}
/* Report quiescent states for those that went offline. */
mask_ofl_test |= mask_ofl_ipi;
sync_rcu_preempt_exp_done(rnp_root));
return;
}
- pr_err("INFO: %s detected expedited stalls on CPUs: {",
+ pr_err("INFO: %s detected expedited stalls on CPUs/tasks: {",
rsp->name);
rcu_for_each_leaf_node(rsp, rnp) {
+ (void)rcu_print_task_exp_stall(rnp);
mask = 1;
for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask <<= 1) {
+ struct rcu_data *rdp;
+
if (!(rnp->expmask & mask))
continue;
- pr_cont(" %d", cpu);
+ rdp = per_cpu_ptr(rsp->rda, cpu);
+ pr_cont(" %d-%c%c%c", cpu,
+ "O."[cpu_online(cpu)],
+ "o."[!!(rdp->grpmask & rnp->expmaskinit)],
+ "N."[!!(rdp->grpmask & rnp->expmaskinitnext)]);
}
mask <<= 1;
}
return; /* Someone else did our work for us. */
rcu_exp_gp_seq_start(rsp);
- sync_sched_exp_select_cpus(rsp);
+ sync_rcu_exp_select_cpus(rsp, sync_sched_exp_handler);
synchronize_sched_expedited_wait(rsp);
rcu_exp_gp_seq_end(rsp);
break;
case CPU_ONLINE:
case CPU_DOWN_FAILED:
+ sync_sched_exp_online_cleanup(cpu);
rcu_boost_kthread_setaffinity(rnp, -1);
break;
case CPU_DOWN_PREPARE:
break;
case CPU_DYING_IDLE:
/* QS for any half-done expedited RCU-sched GP. */
- rcu_sched_qs();
+ preempt_disable();
+ rcu_report_exp_rdp(&rcu_sched_state,
+ this_cpu_ptr(rcu_sched_state.rda), true);
+ preempt_enable();
for_each_rcu_flavor(rsp) {
rcu_cleanup_dying_idle_cpu(cpu, rsp);
rcu_fanout_leaf, nr_cpu_ids);
/*
- * The boot-time rcu_fanout_leaf parameter is only permitted
- * to increase the leaf-level fanout, not decrease it. Of course,
- * the leaf-level fanout cannot exceed the number of bits in
- * the rcu_node masks. Complain and fall back to the compile-
- * time values if these limits are exceeded.
+ * The boot-time rcu_fanout_leaf parameter must be at least two
+ * and cannot exceed the number of bits in the rcu_node masks.
+ * Complain and fall back to the compile-time values if this
+ * limit is exceeded.
*/
- if (rcu_fanout_leaf < RCU_FANOUT_LEAF ||
+ if (rcu_fanout_leaf < 2 ||
rcu_fanout_leaf > sizeof(unsigned long) * 8) {
rcu_fanout_leaf = RCU_FANOUT_LEAF;
WARN_ON(1);
/*
* The tree must be able to accommodate the configured number of CPUs.
- * If this limit is exceeded than we have a serious problem elsewhere.
+ * If this limit is exceeded, fall back to the compile-time values.
*/
- if (nr_cpu_ids > rcu_capacity[RCU_NUM_LVLS - 1])
- panic("rcu_init_geometry: rcu_capacity[] is too small");
+ if (nr_cpu_ids > rcu_capacity[RCU_NUM_LVLS - 1]) {
+ rcu_fanout_leaf = RCU_FANOUT_LEAF;
+ WARN_ON(1);
+ return;
+ }
/* Calculate the number of levels in the tree. */
for (i = 0; nr_cpu_ids > rcu_capacity[i]; i++) {