4 * Copyright (c) 2016, Ericsson AB
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the names of the copyright holders nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
19 * Alternatively, this software may be distributed under the terms of the
20 * GNU General Public License ("GPL") version 2 as published by the Free
21 * Software Foundation.
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
27 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
40 #define MAX_MON_DOMAIN 64
41 #define MON_TIMEOUT 120000
42 #define MAX_PEER_DOWN_EVENTS 4
44 /* struct tipc_mon_domain: domain record to be transferred between peers
45 * @len: actual size of domain record
46 * @gen: current generation of sender's domain
47 * @ack_gen: most recent generation of self's domain acked by peer
48 * @member_cnt: number of domain member nodes described in this record
49 * @up_map: bit map indicating which of the members the sender considers up
50 * @members: identity of the domain members
52 struct tipc_mon_domain {
58 u32 members[MAX_MON_DOMAIN];
61 /* struct tipc_peer: state of a peer node and its domain
62 * @addr: tipc node identity of peer
63 * @head_map: shows which other nodes currently consider peer 'up'
64 * @domain: most recent domain record from peer
65 * @hash: position in hashed lookup list
66 * @list: position in linked list, in circular ascending order by 'addr'
67 * @applied: number of reported domain members applied on this monitor list
68 * @is_up: peer is up as seen from this node
69 * @is_head: peer is assigned domain head as seen from this node
70 * @is_local: peer is in local domain and should be continuously monitored
71 * @down_cnt: - numbers of other peers which have reported this on lost
75 struct tipc_mon_domain *domain;
76 struct hlist_node hash;
77 struct list_head list;
86 struct hlist_head peers[NODE_HTABLE_SIZE];
88 struct tipc_peer *self;
90 struct tipc_mon_domain cache;
94 struct timer_list timer;
95 unsigned long timer_intv;
98 static struct tipc_monitor *tipc_monitor(struct net *net, int bearer_id)
100 return tipc_net(net)->monitors[bearer_id];
103 const int tipc_max_domain_size = sizeof(struct tipc_mon_domain);
105 /* dom_rec_len(): actual length of domain record for transport
107 static int dom_rec_len(struct tipc_mon_domain *dom, u16 mcnt)
109 return ((void *)&dom->members - (void *)dom) + (mcnt * sizeof(u32));
112 /* dom_size() : calculate size of own domain based on number of peers
114 static int dom_size(int peers)
118 while ((i * i) < peers)
120 return i < MAX_MON_DOMAIN ? i : MAX_MON_DOMAIN;
123 static void map_set(u64 *up_map, int i, unsigned int v)
125 *up_map &= ~(1 << i);
129 static int map_get(u64 up_map, int i)
131 return (up_map & (1 << i)) >> i;
134 static struct tipc_peer *peer_prev(struct tipc_peer *peer)
136 return list_last_entry(&peer->list, struct tipc_peer, list);
139 static struct tipc_peer *peer_nxt(struct tipc_peer *peer)
141 return list_first_entry(&peer->list, struct tipc_peer, list);
144 static struct tipc_peer *peer_head(struct tipc_peer *peer)
146 while (!peer->is_head)
147 peer = peer_prev(peer);
151 static struct tipc_peer *get_peer(struct tipc_monitor *mon, u32 addr)
153 struct tipc_peer *peer;
154 unsigned int thash = tipc_hashfn(addr);
156 hlist_for_each_entry(peer, &mon->peers[thash], hash) {
157 if (peer->addr == addr)
163 static struct tipc_peer *get_self(struct net *net, int bearer_id)
165 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
170 static inline bool tipc_mon_is_active(struct net *net, struct tipc_monitor *mon)
172 struct tipc_net *tn = tipc_net(net);
174 return mon->peer_cnt > tn->mon_threshold;
177 /* mon_identify_lost_members() : - identify amd mark potentially lost members
179 static void mon_identify_lost_members(struct tipc_peer *peer,
180 struct tipc_mon_domain *dom_bef,
183 struct tipc_peer *member = peer;
184 struct tipc_mon_domain *dom_aft = peer->domain;
185 int applied_aft = peer->applied;
188 for (i = 0; i < applied_bef; i++) {
189 member = peer_nxt(member);
191 /* Do nothing if self or peer already see member as down */
192 if (!member->is_up || !map_get(dom_bef->up_map, i))
195 /* Loss of local node must be detected by active probing */
196 if (member->is_local)
199 /* Start probing if member was removed from applied domain */
200 if (!applied_aft || (applied_aft < i)) {
201 member->down_cnt = 1;
205 /* Member loss is confirmed if it is still in applied domain */
206 if (!map_get(dom_aft->up_map, i))
211 /* mon_apply_domain() : match a peer's domain record against monitor list
213 static void mon_apply_domain(struct tipc_monitor *mon,
214 struct tipc_peer *peer)
216 struct tipc_mon_domain *dom = peer->domain;
217 struct tipc_peer *member;
221 if (!dom || !peer->is_up)
224 /* Scan across domain members and match against monitor list */
226 member = peer_nxt(peer);
227 for (i = 0; i < dom->member_cnt; i++) {
228 addr = dom->members[i];
229 if (addr != member->addr)
232 member = peer_nxt(member);
236 /* mon_update_local_domain() : update after peer addition/removal/up/down
238 static void mon_update_local_domain(struct tipc_monitor *mon)
240 struct tipc_peer *self = mon->self;
241 struct tipc_mon_domain *cache = &mon->cache;
242 struct tipc_mon_domain *dom = self->domain;
243 struct tipc_peer *peer = self;
244 u64 prev_up_map = dom->up_map;
248 /* Update local domain size based on current size of cluster */
249 member_cnt = dom_size(mon->peer_cnt) - 1;
250 self->applied = member_cnt;
252 /* Update native and cached outgoing local domain records */
253 dom->len = dom_rec_len(dom, member_cnt);
254 diff = dom->member_cnt != member_cnt;
255 dom->member_cnt = member_cnt;
256 for (i = 0; i < member_cnt; i++) {
257 peer = peer_nxt(peer);
258 diff |= dom->members[i] != peer->addr;
259 dom->members[i] = peer->addr;
260 map_set(&dom->up_map, i, peer->is_up);
261 cache->members[i] = htonl(peer->addr);
263 diff |= dom->up_map != prev_up_map;
266 dom->gen = ++mon->dom_gen;
267 cache->len = htons(dom->len);
268 cache->gen = htons(dom->gen);
269 cache->member_cnt = htons(member_cnt);
270 cache->up_map = cpu_to_be64(dom->up_map);
271 mon_apply_domain(mon, self);
274 /* mon_update_neighbors() : update preceding neighbors of added/removed peer
276 static void mon_update_neighbors(struct tipc_monitor *mon,
277 struct tipc_peer *peer)
281 dz = dom_size(mon->peer_cnt);
282 for (i = 0; i < dz; i++) {
283 mon_apply_domain(mon, peer);
284 peer = peer_prev(peer);
288 /* mon_assign_roles() : reassign peer roles after a network change
289 * The monitor list is consistent at this stage; i.e., each peer is monitoring
290 * a set of domain members as matched between domain record and the monitor list
292 static void mon_assign_roles(struct tipc_monitor *mon, struct tipc_peer *head)
294 struct tipc_peer *peer = peer_nxt(head);
295 struct tipc_peer *self = mon->self;
298 for (; peer != self; peer = peer_nxt(peer)) {
299 peer->is_local = false;
301 /* Update domain member */
302 if (i++ < head->applied) {
303 peer->is_head = false;
305 peer->is_local = true;
308 /* Assign next domain head */
314 head->is_head = true;
320 void tipc_mon_remove_peer(struct net *net, u32 addr, int bearer_id)
322 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
323 struct tipc_peer *self = get_self(net, bearer_id);
324 struct tipc_peer *peer, *prev, *head;
326 write_lock_bh(&mon->lock);
327 peer = get_peer(mon, addr);
330 prev = peer_prev(peer);
331 list_del(&peer->list);
332 hlist_del(&peer->hash);
336 head = peer_head(prev);
338 mon_update_local_domain(mon);
339 mon_update_neighbors(mon, prev);
341 /* Revert to full-mesh monitoring if we reach threshold */
342 if (!tipc_mon_is_active(net, mon)) {
343 list_for_each_entry(peer, &self->list, list) {
349 mon_assign_roles(mon, head);
351 write_unlock_bh(&mon->lock);
354 static bool tipc_mon_add_peer(struct tipc_monitor *mon, u32 addr,
355 struct tipc_peer **peer)
357 struct tipc_peer *self = mon->self;
358 struct tipc_peer *cur, *prev, *p;
360 p = kzalloc(sizeof(*p), GFP_ATOMIC);
366 /* Add new peer to lookup list */
367 INIT_LIST_HEAD(&p->list);
368 hlist_add_head(&p->hash, &mon->peers[tipc_hashfn(addr)]);
370 /* Sort new peer into iterator list, in ascending circular order */
372 list_for_each_entry(cur, &self->list, list) {
373 if ((addr > prev->addr) && (addr < cur->addr))
375 if (((addr < cur->addr) || (addr > prev->addr)) &&
376 (prev->addr > cur->addr))
380 list_add_tail(&p->list, &cur->list);
382 mon_update_neighbors(mon, p);
386 void tipc_mon_peer_up(struct net *net, u32 addr, int bearer_id)
388 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
389 struct tipc_peer *self = get_self(net, bearer_id);
390 struct tipc_peer *peer, *head;
392 write_lock_bh(&mon->lock);
393 peer = get_peer(mon, addr);
394 if (!peer && !tipc_mon_add_peer(mon, addr, &peer))
397 head = peer_head(peer);
399 mon_update_local_domain(mon);
400 mon_assign_roles(mon, head);
402 write_unlock_bh(&mon->lock);
405 void tipc_mon_peer_down(struct net *net, u32 addr, int bearer_id)
407 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
408 struct tipc_peer *self = get_self(net, bearer_id);
409 struct tipc_peer *peer, *head;
410 struct tipc_mon_domain *dom;
413 write_lock_bh(&mon->lock);
414 peer = get_peer(mon, addr);
416 pr_warn("Mon: unknown link %x/%u DOWN\n", addr, bearer_id);
419 applied = peer->applied;
424 mon_identify_lost_members(peer, dom, applied);
427 peer->is_head = false;
428 peer->is_local = false;
430 head = peer_head(peer);
432 mon_update_local_domain(mon);
433 mon_assign_roles(mon, head);
435 write_unlock_bh(&mon->lock);
438 /* tipc_mon_rcv - process monitor domain event message
440 void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr,
441 struct tipc_mon_state *state, int bearer_id)
443 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
444 struct tipc_mon_domain *arrv_dom = data;
445 struct tipc_mon_domain dom_bef;
446 struct tipc_mon_domain *dom;
447 struct tipc_peer *peer;
448 u16 new_member_cnt = ntohs(arrv_dom->member_cnt);
449 int new_dlen = dom_rec_len(arrv_dom, new_member_cnt);
450 u16 new_gen = ntohs(arrv_dom->gen);
451 u16 acked_gen = ntohs(arrv_dom->ack_gen);
452 bool probing = state->probing;
455 state->probing = false;
459 /* Sanity check received domain record */
460 if ((dlen < new_dlen) || ntohs(arrv_dom->len) != new_dlen) {
461 pr_warn_ratelimited("Received illegal domain record\n");
465 /* Synch generation numbers with peer if link just came up */
466 if (!state->synched) {
467 state->peer_gen = new_gen - 1;
468 state->acked_gen = acked_gen;
469 state->synched = true;
472 if (more(acked_gen, state->acked_gen))
473 state->acked_gen = acked_gen;
475 /* Drop duplicate unless we are waiting for a probe response */
476 if (!more(new_gen, state->peer_gen) && !probing)
479 write_lock_bh(&mon->lock);
480 peer = get_peer(mon, addr);
481 if (!peer || !peer->is_up)
484 /* Peer is confirmed, stop any ongoing probing */
487 /* Task is done for duplicate record */
488 if (!more(new_gen, state->peer_gen))
491 state->peer_gen = new_gen;
493 /* Cache current domain record for later use */
494 dom_bef.member_cnt = 0;
497 memcpy(&dom_bef, dom, dom->len);
499 /* Transform and store received domain record */
500 if (!dom || (dom->len < new_dlen)) {
502 dom = kmalloc(new_dlen, GFP_ATOMIC);
509 dom->member_cnt = new_member_cnt;
510 dom->up_map = be64_to_cpu(arrv_dom->up_map);
511 for (i = 0; i < new_member_cnt; i++)
512 dom->members[i] = ntohl(arrv_dom->members[i]);
514 /* Update peers affected by this domain record */
515 applied_bef = peer->applied;
516 mon_apply_domain(mon, peer);
517 mon_identify_lost_members(peer, &dom_bef, applied_bef);
518 mon_assign_roles(mon, peer_head(peer));
520 write_unlock_bh(&mon->lock);
523 void tipc_mon_prep(struct net *net, void *data, int *dlen,
524 struct tipc_mon_state *state, int bearer_id)
526 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
527 struct tipc_mon_domain *dom = data;
528 u16 gen = mon->dom_gen;
531 if (!tipc_mon_is_active(net, mon))
534 /* Send only a dummy record with ack if peer has acked our last sent */
535 if (likely(state->acked_gen == gen)) {
536 len = dom_rec_len(dom, 0);
538 dom->len = htons(len);
539 dom->gen = htons(gen);
540 dom->ack_gen = htons(state->peer_gen);
544 /* Send the full record */
545 read_lock_bh(&mon->lock);
546 len = ntohs(mon->cache.len);
548 memcpy(data, &mon->cache, len);
549 read_unlock_bh(&mon->lock);
550 dom->ack_gen = htons(state->peer_gen);
553 void tipc_mon_get_state(struct net *net, u32 addr,
554 struct tipc_mon_state *state,
557 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
558 struct tipc_peer *peer;
560 /* Used cached state if table has not changed */
561 if (!state->probing &&
562 (state->list_gen == mon->list_gen) &&
563 (state->acked_gen == mon->dom_gen))
566 read_lock_bh(&mon->lock);
567 peer = get_peer(mon, addr);
569 state->probing = state->acked_gen != mon->dom_gen;
570 state->probing |= peer->down_cnt;
571 state->reset |= peer->down_cnt >= MAX_PEER_DOWN_EVENTS;
572 state->monitoring = peer->is_local;
573 state->monitoring |= peer->is_head;
574 state->list_gen = mon->list_gen;
576 read_unlock_bh(&mon->lock);
579 static void mon_timeout(unsigned long m)
581 struct tipc_monitor *mon = (void *)m;
582 struct tipc_peer *self;
583 int best_member_cnt = dom_size(mon->peer_cnt) - 1;
585 write_lock_bh(&mon->lock);
587 if (self && (best_member_cnt != self->applied)) {
588 mon_update_local_domain(mon);
589 mon_assign_roles(mon, self);
591 write_unlock_bh(&mon->lock);
592 mod_timer(&mon->timer, jiffies + mon->timer_intv);
595 int tipc_mon_create(struct net *net, int bearer_id)
597 struct tipc_net *tn = tipc_net(net);
598 struct tipc_monitor *mon;
599 struct tipc_peer *self;
600 struct tipc_mon_domain *dom;
602 if (tn->monitors[bearer_id])
605 mon = kzalloc(sizeof(*mon), GFP_ATOMIC);
606 self = kzalloc(sizeof(*self), GFP_ATOMIC);
607 dom = kzalloc(sizeof(*dom), GFP_ATOMIC);
608 if (!mon || !self || !dom) {
614 tn->monitors[bearer_id] = mon;
615 rwlock_init(&mon->lock);
620 self->addr = tipc_own_addr(net);
622 self->is_head = true;
623 INIT_LIST_HEAD(&self->list);
624 setup_timer(&mon->timer, mon_timeout, (unsigned long)mon);
625 mon->timer_intv = msecs_to_jiffies(MON_TIMEOUT + (tn->random & 0xffff));
626 mod_timer(&mon->timer, jiffies + mon->timer_intv);
630 void tipc_mon_delete(struct net *net, int bearer_id)
632 struct tipc_net *tn = tipc_net(net);
633 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
634 struct tipc_peer *self = get_self(net, bearer_id);
635 struct tipc_peer *peer, *tmp;
637 write_lock_bh(&mon->lock);
638 tn->monitors[bearer_id] = NULL;
639 list_for_each_entry_safe(peer, tmp, &self->list, list) {
640 list_del(&peer->list);
641 hlist_del(&peer->hash);
646 write_unlock_bh(&mon->lock);
647 del_timer_sync(&mon->timer);