]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - drivers/infiniband/hw/hfi1/mad.c
Merge branch 'for-4.8/core' of git://git.kernel.dk/linux-block
[karo-tx-linux.git] / drivers / infiniband / hw / hfi1 / mad.c
1 /*
2  * Copyright(c) 2015, 2016 Intel Corporation.
3  *
4  * This file is provided under a dual BSD/GPLv2 license.  When using or
5  * redistributing this file, you may do so under either license.
6  *
7  * GPL LICENSE SUMMARY
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of version 2 of the GNU General Public License as
11  * published by the Free Software Foundation.
12  *
13  * This program is distributed in the hope that it will be useful, but
14  * WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * General Public License for more details.
17  *
18  * BSD LICENSE
19  *
20  * Redistribution and use in source and binary forms, with or without
21  * modification, are permitted provided that the following conditions
22  * are met:
23  *
24  *  - Redistributions of source code must retain the above copyright
25  *    notice, this list of conditions and the following disclaimer.
26  *  - Redistributions in binary form must reproduce the above copyright
27  *    notice, this list of conditions and the following disclaimer in
28  *    the documentation and/or other materials provided with the
29  *    distribution.
30  *  - Neither the name of Intel Corporation nor the names of its
31  *    contributors may be used to endorse or promote products derived
32  *    from this software without specific prior written permission.
33  *
34  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45  *
46  */
47
48 #include <linux/net.h>
49 #define OPA_NUM_PKEY_BLOCKS_PER_SMP (OPA_SMP_DR_DATA_SIZE \
50                         / (OPA_PARTITION_TABLE_BLK_SIZE * sizeof(u16)))
51
52 #include "hfi.h"
53 #include "mad.h"
54 #include "trace.h"
55 #include "qp.h"
56
57 /* the reset value from the FM is supposed to be 0xffff, handle both */
58 #define OPA_LINK_WIDTH_RESET_OLD 0x0fff
59 #define OPA_LINK_WIDTH_RESET 0xffff
60
61 static int reply(struct ib_mad_hdr *smp)
62 {
63         /*
64          * The verbs framework will handle the directed/LID route
65          * packet changes.
66          */
67         smp->method = IB_MGMT_METHOD_GET_RESP;
68         if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
69                 smp->status |= IB_SMP_DIRECTION;
70         return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
71 }
72
73 static inline void clear_opa_smp_data(struct opa_smp *smp)
74 {
75         void *data = opa_get_smp_data(smp);
76         size_t size = opa_get_smp_data_size(smp);
77
78         memset(data, 0, size);
79 }
80
81 void hfi1_event_pkey_change(struct hfi1_devdata *dd, u8 port)
82 {
83         struct ib_event event;
84
85         event.event = IB_EVENT_PKEY_CHANGE;
86         event.device = &dd->verbs_dev.rdi.ibdev;
87         event.element.port_num = port;
88         ib_dispatch_event(&event);
89 }
90
91 static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len)
92 {
93         struct ib_mad_send_buf *send_buf;
94         struct ib_mad_agent *agent;
95         struct opa_smp *smp;
96         int ret;
97         unsigned long flags;
98         unsigned long timeout;
99         int pkey_idx;
100         u32 qpn = ppd_from_ibp(ibp)->sm_trap_qp;
101
102         agent = ibp->rvp.send_agent;
103         if (!agent)
104                 return;
105
106         /* o14-3.2.1 */
107         if (ppd_from_ibp(ibp)->lstate != IB_PORT_ACTIVE)
108                 return;
109
110         /* o14-2 */
111         if (ibp->rvp.trap_timeout && time_before(jiffies,
112                                                  ibp->rvp.trap_timeout))
113                 return;
114
115         pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);
116         if (pkey_idx < 0) {
117                 pr_warn("%s: failed to find limited mgmt pkey, defaulting 0x%x\n",
118                         __func__, hfi1_get_pkey(ibp, 1));
119                 pkey_idx = 1;
120         }
121
122         send_buf = ib_create_send_mad(agent, qpn, pkey_idx, 0,
123                                       IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
124                                       GFP_ATOMIC, IB_MGMT_BASE_VERSION);
125         if (IS_ERR(send_buf))
126                 return;
127
128         smp = send_buf->mad;
129         smp->base_version = OPA_MGMT_BASE_VERSION;
130         smp->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
131         smp->class_version = OPA_SMI_CLASS_VERSION;
132         smp->method = IB_MGMT_METHOD_TRAP;
133         ibp->rvp.tid++;
134         smp->tid = cpu_to_be64(ibp->rvp.tid);
135         smp->attr_id = IB_SMP_ATTR_NOTICE;
136         /* o14-1: smp->mkey = 0; */
137         memcpy(smp->route.lid.data, data, len);
138
139         spin_lock_irqsave(&ibp->rvp.lock, flags);
140         if (!ibp->rvp.sm_ah) {
141                 if (ibp->rvp.sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) {
142                         struct ib_ah *ah;
143
144                         ah = hfi1_create_qp0_ah(ibp, ibp->rvp.sm_lid);
145                         if (IS_ERR(ah)) {
146                                 ret = PTR_ERR(ah);
147                         } else {
148                                 send_buf->ah = ah;
149                                 ibp->rvp.sm_ah = ibah_to_rvtah(ah);
150                                 ret = 0;
151                         }
152                 } else {
153                         ret = -EINVAL;
154                 }
155         } else {
156                 send_buf->ah = &ibp->rvp.sm_ah->ibah;
157                 ret = 0;
158         }
159         spin_unlock_irqrestore(&ibp->rvp.lock, flags);
160
161         if (!ret)
162                 ret = ib_post_send_mad(send_buf, NULL);
163         if (!ret) {
164                 /* 4.096 usec. */
165                 timeout = (4096 * (1UL << ibp->rvp.subnet_timeout)) / 1000;
166                 ibp->rvp.trap_timeout = jiffies + usecs_to_jiffies(timeout);
167         } else {
168                 ib_free_send_mad(send_buf);
169                 ibp->rvp.trap_timeout = 0;
170         }
171 }
172
173 /*
174  * Send a bad [PQ]_Key trap (ch. 14.3.8).
175  */
176 void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
177                     u32 qp1, u32 qp2, u16 lid1, u16 lid2)
178 {
179         struct opa_mad_notice_attr data;
180         u32 lid = ppd_from_ibp(ibp)->lid;
181         u32 _lid1 = lid1;
182         u32 _lid2 = lid2;
183
184         memset(&data, 0, sizeof(data));
185
186         if (trap_num == OPA_TRAP_BAD_P_KEY)
187                 ibp->rvp.pkey_violations++;
188         else
189                 ibp->rvp.qkey_violations++;
190         ibp->rvp.n_pkt_drops++;
191
192         /* Send violation trap */
193         data.generic_type = IB_NOTICE_TYPE_SECURITY;
194         data.prod_type_lsb = IB_NOTICE_PROD_CA;
195         data.trap_num = trap_num;
196         data.issuer_lid = cpu_to_be32(lid);
197         data.ntc_257_258.lid1 = cpu_to_be32(_lid1);
198         data.ntc_257_258.lid2 = cpu_to_be32(_lid2);
199         data.ntc_257_258.key = cpu_to_be32(key);
200         data.ntc_257_258.sl = sl << 3;
201         data.ntc_257_258.qp1 = cpu_to_be32(qp1);
202         data.ntc_257_258.qp2 = cpu_to_be32(qp2);
203
204         send_trap(ibp, &data, sizeof(data));
205 }
206
207 /*
208  * Send a bad M_Key trap (ch. 14.3.9).
209  */
210 static void bad_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad,
211                      __be64 mkey, __be32 dr_slid, u8 return_path[], u8 hop_cnt)
212 {
213         struct opa_mad_notice_attr data;
214         u32 lid = ppd_from_ibp(ibp)->lid;
215
216         memset(&data, 0, sizeof(data));
217         /* Send violation trap */
218         data.generic_type = IB_NOTICE_TYPE_SECURITY;
219         data.prod_type_lsb = IB_NOTICE_PROD_CA;
220         data.trap_num = OPA_TRAP_BAD_M_KEY;
221         data.issuer_lid = cpu_to_be32(lid);
222         data.ntc_256.lid = data.issuer_lid;
223         data.ntc_256.method = mad->method;
224         data.ntc_256.attr_id = mad->attr_id;
225         data.ntc_256.attr_mod = mad->attr_mod;
226         data.ntc_256.mkey = mkey;
227         if (mad->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
228                 data.ntc_256.dr_slid = dr_slid;
229                 data.ntc_256.dr_trunc_hop = IB_NOTICE_TRAP_DR_NOTICE;
230                 if (hop_cnt > ARRAY_SIZE(data.ntc_256.dr_rtn_path)) {
231                         data.ntc_256.dr_trunc_hop |=
232                                 IB_NOTICE_TRAP_DR_TRUNC;
233                         hop_cnt = ARRAY_SIZE(data.ntc_256.dr_rtn_path);
234                 }
235                 data.ntc_256.dr_trunc_hop |= hop_cnt;
236                 memcpy(data.ntc_256.dr_rtn_path, return_path,
237                        hop_cnt);
238         }
239
240         send_trap(ibp, &data, sizeof(data));
241 }
242
243 /*
244  * Send a Port Capability Mask Changed trap (ch. 14.3.11).
245  */
246 void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num)
247 {
248         struct opa_mad_notice_attr data;
249         struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
250         struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
251         struct hfi1_ibport *ibp = &dd->pport[port_num - 1].ibport_data;
252         u32 lid = ppd_from_ibp(ibp)->lid;
253
254         memset(&data, 0, sizeof(data));
255
256         data.generic_type = IB_NOTICE_TYPE_INFO;
257         data.prod_type_lsb = IB_NOTICE_PROD_CA;
258         data.trap_num = OPA_TRAP_CHANGE_CAPABILITY;
259         data.issuer_lid = cpu_to_be32(lid);
260         data.ntc_144.lid = data.issuer_lid;
261         data.ntc_144.new_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
262
263         send_trap(ibp, &data, sizeof(data));
264 }
265
266 /*
267  * Send a System Image GUID Changed trap (ch. 14.3.12).
268  */
269 void hfi1_sys_guid_chg(struct hfi1_ibport *ibp)
270 {
271         struct opa_mad_notice_attr data;
272         u32 lid = ppd_from_ibp(ibp)->lid;
273
274         memset(&data, 0, sizeof(data));
275
276         data.generic_type = IB_NOTICE_TYPE_INFO;
277         data.prod_type_lsb = IB_NOTICE_PROD_CA;
278         data.trap_num = OPA_TRAP_CHANGE_SYSGUID;
279         data.issuer_lid = cpu_to_be32(lid);
280         data.ntc_145.new_sys_guid = ib_hfi1_sys_image_guid;
281         data.ntc_145.lid = data.issuer_lid;
282
283         send_trap(ibp, &data, sizeof(data));
284 }
285
286 /*
287  * Send a Node Description Changed trap (ch. 14.3.13).
288  */
289 void hfi1_node_desc_chg(struct hfi1_ibport *ibp)
290 {
291         struct opa_mad_notice_attr data;
292         u32 lid = ppd_from_ibp(ibp)->lid;
293
294         memset(&data, 0, sizeof(data));
295
296         data.generic_type = IB_NOTICE_TYPE_INFO;
297         data.prod_type_lsb = IB_NOTICE_PROD_CA;
298         data.trap_num = OPA_TRAP_CHANGE_CAPABILITY;
299         data.issuer_lid = cpu_to_be32(lid);
300         data.ntc_144.lid = data.issuer_lid;
301         data.ntc_144.change_flags =
302                 cpu_to_be16(OPA_NOTICE_TRAP_NODE_DESC_CHG);
303
304         send_trap(ibp, &data, sizeof(data));
305 }
306
307 static int __subn_get_opa_nodedesc(struct opa_smp *smp, u32 am,
308                                    u8 *data, struct ib_device *ibdev,
309                                    u8 port, u32 *resp_len)
310 {
311         struct opa_node_description *nd;
312
313         if (am) {
314                 smp->status |= IB_SMP_INVALID_FIELD;
315                 return reply((struct ib_mad_hdr *)smp);
316         }
317
318         nd = (struct opa_node_description *)data;
319
320         memcpy(nd->data, ibdev->node_desc, sizeof(nd->data));
321
322         if (resp_len)
323                 *resp_len += sizeof(*nd);
324
325         return reply((struct ib_mad_hdr *)smp);
326 }
327
328 static int __subn_get_opa_nodeinfo(struct opa_smp *smp, u32 am, u8 *data,
329                                    struct ib_device *ibdev, u8 port,
330                                    u32 *resp_len)
331 {
332         struct opa_node_info *ni;
333         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
334         unsigned pidx = port - 1; /* IB number port from 1, hw from 0 */
335
336         ni = (struct opa_node_info *)data;
337
338         /* GUID 0 is illegal */
339         if (am || pidx >= dd->num_pports || dd->pport[pidx].guid == 0) {
340                 smp->status |= IB_SMP_INVALID_FIELD;
341                 return reply((struct ib_mad_hdr *)smp);
342         }
343
344         ni->port_guid = cpu_to_be64(dd->pport[pidx].guid);
345         ni->base_version = OPA_MGMT_BASE_VERSION;
346         ni->class_version = OPA_SMI_CLASS_VERSION;
347         ni->node_type = 1;     /* channel adapter */
348         ni->num_ports = ibdev->phys_port_cnt;
349         /* This is already in network order */
350         ni->system_image_guid = ib_hfi1_sys_image_guid;
351         /* Use first-port GUID as node */
352         ni->node_guid = cpu_to_be64(dd->pport->guid);
353         ni->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd));
354         ni->device_id = cpu_to_be16(dd->pcidev->device);
355         ni->revision = cpu_to_be32(dd->minrev);
356         ni->local_port_num = port;
357         ni->vendor_id[0] = dd->oui1;
358         ni->vendor_id[1] = dd->oui2;
359         ni->vendor_id[2] = dd->oui3;
360
361         if (resp_len)
362                 *resp_len += sizeof(*ni);
363
364         return reply((struct ib_mad_hdr *)smp);
365 }
366
367 static int subn_get_nodeinfo(struct ib_smp *smp, struct ib_device *ibdev,
368                              u8 port)
369 {
370         struct ib_node_info *nip = (struct ib_node_info *)&smp->data;
371         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
372         unsigned pidx = port - 1; /* IB number port from 1, hw from 0 */
373
374         /* GUID 0 is illegal */
375         if (smp->attr_mod || pidx >= dd->num_pports ||
376             dd->pport[pidx].guid == 0)
377                 smp->status |= IB_SMP_INVALID_FIELD;
378         else
379                 nip->port_guid = cpu_to_be64(dd->pport[pidx].guid);
380
381         nip->base_version = OPA_MGMT_BASE_VERSION;
382         nip->class_version = OPA_SMI_CLASS_VERSION;
383         nip->node_type = 1;     /* channel adapter */
384         nip->num_ports = ibdev->phys_port_cnt;
385         /* This is already in network order */
386         nip->sys_guid = ib_hfi1_sys_image_guid;
387          /* Use first-port GUID as node */
388         nip->node_guid = cpu_to_be64(dd->pport->guid);
389         nip->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd));
390         nip->device_id = cpu_to_be16(dd->pcidev->device);
391         nip->revision = cpu_to_be32(dd->minrev);
392         nip->local_port_num = port;
393         nip->vendor_id[0] = dd->oui1;
394         nip->vendor_id[1] = dd->oui2;
395         nip->vendor_id[2] = dd->oui3;
396
397         return reply((struct ib_mad_hdr *)smp);
398 }
399
400 static void set_link_width_enabled(struct hfi1_pportdata *ppd, u32 w)
401 {
402         (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_LWID_ENB, w);
403 }
404
405 static void set_link_width_downgrade_enabled(struct hfi1_pportdata *ppd, u32 w)
406 {
407         (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_LWID_DG_ENB, w);
408 }
409
410 static void set_link_speed_enabled(struct hfi1_pportdata *ppd, u32 s)
411 {
412         (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_SPD_ENB, s);
413 }
414
415 static int check_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad,
416                       int mad_flags, __be64 mkey, __be32 dr_slid,
417                       u8 return_path[], u8 hop_cnt)
418 {
419         int valid_mkey = 0;
420         int ret = 0;
421
422         /* Is the mkey in the process of expiring? */
423         if (ibp->rvp.mkey_lease_timeout &&
424             time_after_eq(jiffies, ibp->rvp.mkey_lease_timeout)) {
425                 /* Clear timeout and mkey protection field. */
426                 ibp->rvp.mkey_lease_timeout = 0;
427                 ibp->rvp.mkeyprot = 0;
428         }
429
430         if ((mad_flags & IB_MAD_IGNORE_MKEY) ||  ibp->rvp.mkey == 0 ||
431             ibp->rvp.mkey == mkey)
432                 valid_mkey = 1;
433
434         /* Unset lease timeout on any valid Get/Set/TrapRepress */
435         if (valid_mkey && ibp->rvp.mkey_lease_timeout &&
436             (mad->method == IB_MGMT_METHOD_GET ||
437              mad->method == IB_MGMT_METHOD_SET ||
438              mad->method == IB_MGMT_METHOD_TRAP_REPRESS))
439                 ibp->rvp.mkey_lease_timeout = 0;
440
441         if (!valid_mkey) {
442                 switch (mad->method) {
443                 case IB_MGMT_METHOD_GET:
444                         /* Bad mkey not a violation below level 2 */
445                         if (ibp->rvp.mkeyprot < 2)
446                                 break;
447                 case IB_MGMT_METHOD_SET:
448                 case IB_MGMT_METHOD_TRAP_REPRESS:
449                         if (ibp->rvp.mkey_violations != 0xFFFF)
450                                 ++ibp->rvp.mkey_violations;
451                         if (!ibp->rvp.mkey_lease_timeout &&
452                             ibp->rvp.mkey_lease_period)
453                                 ibp->rvp.mkey_lease_timeout = jiffies +
454                                         ibp->rvp.mkey_lease_period * HZ;
455                         /* Generate a trap notice. */
456                         bad_mkey(ibp, mad, mkey, dr_slid, return_path,
457                                  hop_cnt);
458                         ret = 1;
459                 }
460         }
461
462         return ret;
463 }
464
465 /*
466  * The SMA caches reads from LCB registers in case the LCB is unavailable.
467  * (The LCB is unavailable in certain link states, for example.)
468  */
469 struct lcb_datum {
470         u32 off;
471         u64 val;
472 };
473
474 static struct lcb_datum lcb_cache[] = {
475         { DC_LCB_STS_ROUND_TRIP_LTP_CNT, 0 },
476 };
477
478 static int write_lcb_cache(u32 off, u64 val)
479 {
480         int i;
481
482         for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) {
483                 if (lcb_cache[i].off == off) {
484                         lcb_cache[i].val = val;
485                         return 0;
486                 }
487         }
488
489         pr_warn("%s bad offset 0x%x\n", __func__, off);
490         return -1;
491 }
492
493 static int read_lcb_cache(u32 off, u64 *val)
494 {
495         int i;
496
497         for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) {
498                 if (lcb_cache[i].off == off) {
499                         *val = lcb_cache[i].val;
500                         return 0;
501                 }
502         }
503
504         pr_warn("%s bad offset 0x%x\n", __func__, off);
505         return -1;
506 }
507
508 void read_ltp_rtt(struct hfi1_devdata *dd)
509 {
510         u64 reg;
511
512         if (read_lcb_csr(dd, DC_LCB_STS_ROUND_TRIP_LTP_CNT, &reg))
513                 dd_dev_err(dd, "%s: unable to read LTP RTT\n", __func__);
514         else
515                 write_lcb_cache(DC_LCB_STS_ROUND_TRIP_LTP_CNT, reg);
516 }
517
518 static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
519                                    struct ib_device *ibdev, u8 port,
520                                    u32 *resp_len)
521 {
522         int i;
523         struct hfi1_devdata *dd;
524         struct hfi1_pportdata *ppd;
525         struct hfi1_ibport *ibp;
526         struct opa_port_info *pi = (struct opa_port_info *)data;
527         u8 mtu;
528         u8 credit_rate;
529         u8 is_beaconing_active;
530         u32 state;
531         u32 num_ports = OPA_AM_NPORT(am);
532         u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
533         u32 buffer_units;
534         u64 tmp = 0;
535
536         if (num_ports != 1) {
537                 smp->status |= IB_SMP_INVALID_FIELD;
538                 return reply((struct ib_mad_hdr *)smp);
539         }
540
541         dd = dd_from_ibdev(ibdev);
542         /* IB numbers ports from 1, hw from 0 */
543         ppd = dd->pport + (port - 1);
544         ibp = &ppd->ibport_data;
545
546         if (ppd->vls_supported / 2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
547             ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
548                 smp->status |= IB_SMP_INVALID_FIELD;
549                 return reply((struct ib_mad_hdr *)smp);
550         }
551
552         pi->lid = cpu_to_be32(ppd->lid);
553
554         /* Only return the mkey if the protection field allows it. */
555         if (!(smp->method == IB_MGMT_METHOD_GET &&
556               ibp->rvp.mkey != smp->mkey &&
557               ibp->rvp.mkeyprot == 1))
558                 pi->mkey = ibp->rvp.mkey;
559
560         pi->subnet_prefix = ibp->rvp.gid_prefix;
561         pi->sm_lid = cpu_to_be32(ibp->rvp.sm_lid);
562         pi->ib_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
563         pi->mkey_lease_period = cpu_to_be16(ibp->rvp.mkey_lease_period);
564         pi->sm_trap_qp = cpu_to_be32(ppd->sm_trap_qp);
565         pi->sa_qp = cpu_to_be32(ppd->sa_qp);
566
567         pi->link_width.enabled = cpu_to_be16(ppd->link_width_enabled);
568         pi->link_width.supported = cpu_to_be16(ppd->link_width_supported);
569         pi->link_width.active = cpu_to_be16(ppd->link_width_active);
570
571         pi->link_width_downgrade.supported =
572                         cpu_to_be16(ppd->link_width_downgrade_supported);
573         pi->link_width_downgrade.enabled =
574                         cpu_to_be16(ppd->link_width_downgrade_enabled);
575         pi->link_width_downgrade.tx_active =
576                         cpu_to_be16(ppd->link_width_downgrade_tx_active);
577         pi->link_width_downgrade.rx_active =
578                         cpu_to_be16(ppd->link_width_downgrade_rx_active);
579
580         pi->link_speed.supported = cpu_to_be16(ppd->link_speed_supported);
581         pi->link_speed.active = cpu_to_be16(ppd->link_speed_active);
582         pi->link_speed.enabled = cpu_to_be16(ppd->link_speed_enabled);
583
584         state = driver_lstate(ppd);
585
586         if (start_of_sm_config && (state == IB_PORT_INIT))
587                 ppd->is_sm_config_started = 1;
588
589         pi->port_phys_conf = (ppd->port_type & 0xf);
590
591 #if PI_LED_ENABLE_SUP
592         pi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4;
593         pi->port_states.ledenable_offlinereason |=
594                 ppd->is_sm_config_started << 5;
595         /*
596          * This pairs with the memory barrier in hfi1_start_led_override to
597          * ensure that we read the correct state of LED beaconing represented
598          * by led_override_timer_active
599          */
600         smp_rmb();
601         is_beaconing_active = !!atomic_read(&ppd->led_override_timer_active);
602         pi->port_states.ledenable_offlinereason |= is_beaconing_active << 6;
603         pi->port_states.ledenable_offlinereason |=
604                 ppd->offline_disabled_reason;
605 #else
606         pi->port_states.offline_reason = ppd->neighbor_normal << 4;
607         pi->port_states.offline_reason |= ppd->is_sm_config_started << 5;
608         pi->port_states.offline_reason |= ppd->offline_disabled_reason;
609 #endif /* PI_LED_ENABLE_SUP */
610
611         pi->port_states.portphysstate_portstate =
612                 (hfi1_ibphys_portstate(ppd) << 4) | state;
613
614         pi->mkeyprotect_lmc = (ibp->rvp.mkeyprot << 6) | ppd->lmc;
615
616         memset(pi->neigh_mtu.pvlx_to_mtu, 0, sizeof(pi->neigh_mtu.pvlx_to_mtu));
617         for (i = 0; i < ppd->vls_supported; i++) {
618                 mtu = mtu_to_enum(dd->vld[i].mtu, HFI1_DEFAULT_ACTIVE_MTU);
619                 if ((i % 2) == 0)
620                         pi->neigh_mtu.pvlx_to_mtu[i / 2] |= (mtu << 4);
621                 else
622                         pi->neigh_mtu.pvlx_to_mtu[i / 2] |= mtu;
623         }
624         /* don't forget VL 15 */
625         mtu = mtu_to_enum(dd->vld[15].mtu, 2048);
626         pi->neigh_mtu.pvlx_to_mtu[15 / 2] |= mtu;
627         pi->smsl = ibp->rvp.sm_sl & OPA_PI_MASK_SMSL;
628         pi->operational_vls = hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_OP_VLS);
629         pi->partenforce_filterraw |=
630                 (ppd->linkinit_reason & OPA_PI_MASK_LINKINIT_REASON);
631         if (ppd->part_enforce & HFI1_PART_ENFORCE_IN)
632                 pi->partenforce_filterraw |= OPA_PI_MASK_PARTITION_ENFORCE_IN;
633         if (ppd->part_enforce & HFI1_PART_ENFORCE_OUT)
634                 pi->partenforce_filterraw |= OPA_PI_MASK_PARTITION_ENFORCE_OUT;
635         pi->mkey_violations = cpu_to_be16(ibp->rvp.mkey_violations);
636         /* P_KeyViolations are counted by hardware. */
637         pi->pkey_violations = cpu_to_be16(ibp->rvp.pkey_violations);
638         pi->qkey_violations = cpu_to_be16(ibp->rvp.qkey_violations);
639
640         pi->vl.cap = ppd->vls_supported;
641         pi->vl.high_limit = cpu_to_be16(ibp->rvp.vl_high_limit);
642         pi->vl.arb_high_cap = (u8)hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_VL_HIGH_CAP);
643         pi->vl.arb_low_cap = (u8)hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_VL_LOW_CAP);
644
645         pi->clientrereg_subnettimeout = ibp->rvp.subnet_timeout;
646
647         pi->port_link_mode  = cpu_to_be16(OPA_PORT_LINK_MODE_OPA << 10 |
648                                           OPA_PORT_LINK_MODE_OPA << 5 |
649                                           OPA_PORT_LINK_MODE_OPA);
650
651         pi->port_ltp_crc_mode = cpu_to_be16(ppd->port_ltp_crc_mode);
652
653         pi->port_mode = cpu_to_be16(
654                                 ppd->is_active_optimize_enabled ?
655                                         OPA_PI_MASK_PORT_ACTIVE_OPTOMIZE : 0);
656
657         pi->port_packet_format.supported =
658                 cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B);
659         pi->port_packet_format.enabled =
660                 cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B);
661
662         /* flit_control.interleave is (OPA V1, version .76):
663          * bits         use
664          * ----         ---
665          * 2            res
666          * 2            DistanceSupported
667          * 2            DistanceEnabled
668          * 5            MaxNextLevelTxEnabled
669          * 5            MaxNestLevelRxSupported
670          *
671          * HFI supports only "distance mode 1" (see OPA V1, version .76,
672          * section 9.6.2), so set DistanceSupported, DistanceEnabled
673          * to 0x1.
674          */
675         pi->flit_control.interleave = cpu_to_be16(0x1400);
676
677         pi->link_down_reason = ppd->local_link_down_reason.sma;
678         pi->neigh_link_down_reason = ppd->neigh_link_down_reason.sma;
679         pi->port_error_action = cpu_to_be32(ppd->port_error_action);
680         pi->mtucap = mtu_to_enum(hfi1_max_mtu, IB_MTU_4096);
681
682         /* 32.768 usec. response time (guessing) */
683         pi->resptimevalue = 3;
684
685         pi->local_port_num = port;
686
687         /* buffer info for FM */
688         pi->overall_buffer_space = cpu_to_be16(dd->link_credits);
689
690         pi->neigh_node_guid = cpu_to_be64(ppd->neighbor_guid);
691         pi->neigh_port_num = ppd->neighbor_port_number;
692         pi->port_neigh_mode =
693                 (ppd->neighbor_type & OPA_PI_MASK_NEIGH_NODE_TYPE) |
694                 (ppd->mgmt_allowed ? OPA_PI_MASK_NEIGH_MGMT_ALLOWED : 0) |
695                 (ppd->neighbor_fm_security ?
696                         OPA_PI_MASK_NEIGH_FW_AUTH_BYPASS : 0);
697
698         /* HFIs shall always return VL15 credits to their
699          * neighbor in a timely manner, without any credit return pacing.
700          */
701         credit_rate = 0;
702         buffer_units  = (dd->vau) & OPA_PI_MASK_BUF_UNIT_BUF_ALLOC;
703         buffer_units |= (dd->vcu << 3) & OPA_PI_MASK_BUF_UNIT_CREDIT_ACK;
704         buffer_units |= (credit_rate << 6) &
705                                 OPA_PI_MASK_BUF_UNIT_VL15_CREDIT_RATE;
706         buffer_units |= (dd->vl15_init << 11) & OPA_PI_MASK_BUF_UNIT_VL15_INIT;
707         pi->buffer_units = cpu_to_be32(buffer_units);
708
709         pi->opa_cap_mask = cpu_to_be16(OPA_CAP_MASK3_IsSharedSpaceSupported);
710
711         /* HFI supports a replay buffer 128 LTPs in size */
712         pi->replay_depth.buffer = 0x80;
713         /* read the cached value of DC_LCB_STS_ROUND_TRIP_LTP_CNT */
714         read_lcb_cache(DC_LCB_STS_ROUND_TRIP_LTP_CNT, &tmp);
715
716         /*
717          * this counter is 16 bits wide, but the replay_depth.wire
718          * variable is only 8 bits
719          */
720         if (tmp > 0xff)
721                 tmp = 0xff;
722         pi->replay_depth.wire = tmp;
723
724         if (resp_len)
725                 *resp_len += sizeof(struct opa_port_info);
726
727         return reply((struct ib_mad_hdr *)smp);
728 }
729
730 /**
731  * get_pkeys - return the PKEY table
732  * @dd: the hfi1_ib device
733  * @port: the IB port number
734  * @pkeys: the pkey table is placed here
735  */
736 static int get_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
737 {
738         struct hfi1_pportdata *ppd = dd->pport + port - 1;
739
740         memcpy(pkeys, ppd->pkeys, sizeof(ppd->pkeys));
741
742         return 0;
743 }
744
745 static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
746                                     struct ib_device *ibdev, u8 port,
747                                     u32 *resp_len)
748 {
749         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
750         u32 n_blocks_req = OPA_AM_NBLK(am);
751         u32 start_block = am & 0x7ff;
752         __be16 *p;
753         u16 *q;
754         int i;
755         u16 n_blocks_avail;
756         unsigned npkeys = hfi1_get_npkeys(dd);
757         size_t size;
758
759         if (n_blocks_req == 0) {
760                 pr_warn("OPA Get PKey AM Invalid : P = %d; B = 0x%x; N = 0x%x\n",
761                         port, start_block, n_blocks_req);
762                 smp->status |= IB_SMP_INVALID_FIELD;
763                 return reply((struct ib_mad_hdr *)smp);
764         }
765
766         n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1;
767
768         size = (n_blocks_req * OPA_PARTITION_TABLE_BLK_SIZE) * sizeof(u16);
769
770         if (start_block + n_blocks_req > n_blocks_avail ||
771             n_blocks_req > OPA_NUM_PKEY_BLOCKS_PER_SMP) {
772                 pr_warn("OPA Get PKey AM Invalid : s 0x%x; req 0x%x; "
773                         "avail 0x%x; blk/smp 0x%lx\n",
774                         start_block, n_blocks_req, n_blocks_avail,
775                         OPA_NUM_PKEY_BLOCKS_PER_SMP);
776                 smp->status |= IB_SMP_INVALID_FIELD;
777                 return reply((struct ib_mad_hdr *)smp);
778         }
779
780         p = (__be16 *)data;
781         q = (u16 *)data;
782         /* get the real pkeys if we are requesting the first block */
783         if (start_block == 0) {
784                 get_pkeys(dd, port, q);
785                 for (i = 0; i < npkeys; i++)
786                         p[i] = cpu_to_be16(q[i]);
787                 if (resp_len)
788                         *resp_len += size;
789         } else {
790                 smp->status |= IB_SMP_INVALID_FIELD;
791         }
792         return reply((struct ib_mad_hdr *)smp);
793 }
794
795 enum {
796         HFI_TRANSITION_DISALLOWED,
797         HFI_TRANSITION_IGNORED,
798         HFI_TRANSITION_ALLOWED,
799         HFI_TRANSITION_UNDEFINED,
800 };
801
802 /*
803  * Use shortened names to improve readability of
804  * {logical,physical}_state_transitions
805  */
806 enum {
807         __D = HFI_TRANSITION_DISALLOWED,
808         __I = HFI_TRANSITION_IGNORED,
809         __A = HFI_TRANSITION_ALLOWED,
810         __U = HFI_TRANSITION_UNDEFINED,
811 };
812
813 /*
814  * IB_PORTPHYSSTATE_POLLING (2) through OPA_PORTPHYSSTATE_MAX (11) are
815  * represented in physical_state_transitions.
816  */
817 #define __N_PHYSTATES (OPA_PORTPHYSSTATE_MAX - IB_PORTPHYSSTATE_POLLING + 1)
818
819 /*
820  * Within physical_state_transitions, rows represent "old" states,
821  * columns "new" states, and physical_state_transitions.allowed[old][new]
822  * indicates if the transition from old state to new state is legal (see
823  * OPAg1v1, Table 6-4).
824  */
825 static const struct {
826         u8 allowed[__N_PHYSTATES][__N_PHYSTATES];
827 } physical_state_transitions = {
828         {
829                 /* 2    3    4    5    6    7    8    9   10   11 */
830         /* 2 */ { __A, __A, __D, __D, __D, __D, __D, __D, __D, __D },
831         /* 3 */ { __A, __I, __D, __D, __D, __D, __D, __D, __D, __A },
832         /* 4 */ { __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
833         /* 5 */ { __A, __A, __D, __I, __D, __D, __D, __D, __D, __D },
834         /* 6 */ { __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
835         /* 7 */ { __D, __A, __D, __D, __D, __I, __D, __D, __D, __D },
836         /* 8 */ { __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
837         /* 9 */ { __I, __A, __D, __D, __D, __D, __D, __I, __D, __D },
838         /*10 */ { __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
839         /*11 */ { __D, __A, __D, __D, __D, __D, __D, __D, __D, __I },
840         }
841 };
842
843 /*
844  * IB_PORT_DOWN (1) through IB_PORT_ACTIVE_DEFER (5) are represented
845  * logical_state_transitions
846  */
847
848 #define __N_LOGICAL_STATES (IB_PORT_ACTIVE_DEFER - IB_PORT_DOWN + 1)
849
850 /*
851  * Within logical_state_transitions rows represent "old" states,
852  * columns "new" states, and logical_state_transitions.allowed[old][new]
853  * indicates if the transition from old state to new state is legal (see
854  * OPAg1v1, Table 9-12).
855  */
856 static const struct {
857         u8 allowed[__N_LOGICAL_STATES][__N_LOGICAL_STATES];
858 } logical_state_transitions = {
859         {
860                 /* 1    2    3    4    5 */
861         /* 1 */ { __I, __D, __D, __D, __U},
862         /* 2 */ { __D, __I, __A, __D, __U},
863         /* 3 */ { __D, __D, __I, __A, __U},
864         /* 4 */ { __D, __D, __I, __I, __U},
865         /* 5 */ { __U, __U, __U, __U, __U},
866         }
867 };
868
869 static int logical_transition_allowed(int old, int new)
870 {
871         if (old < IB_PORT_NOP || old > IB_PORT_ACTIVE_DEFER ||
872             new < IB_PORT_NOP || new > IB_PORT_ACTIVE_DEFER) {
873                 pr_warn("invalid logical state(s) (old %d new %d)\n",
874                         old, new);
875                 return HFI_TRANSITION_UNDEFINED;
876         }
877
878         if (new == IB_PORT_NOP)
879                 return HFI_TRANSITION_ALLOWED; /* always allowed */
880
881         /* adjust states for indexing into logical_state_transitions */
882         old -= IB_PORT_DOWN;
883         new -= IB_PORT_DOWN;
884
885         if (old < 0 || new < 0)
886                 return HFI_TRANSITION_UNDEFINED;
887         return logical_state_transitions.allowed[old][new];
888 }
889
890 static int physical_transition_allowed(int old, int new)
891 {
892         if (old < IB_PORTPHYSSTATE_NOP || old > OPA_PORTPHYSSTATE_MAX ||
893             new < IB_PORTPHYSSTATE_NOP || new > OPA_PORTPHYSSTATE_MAX) {
894                 pr_warn("invalid physical state(s) (old %d new %d)\n",
895                         old, new);
896                 return HFI_TRANSITION_UNDEFINED;
897         }
898
899         if (new == IB_PORTPHYSSTATE_NOP)
900                 return HFI_TRANSITION_ALLOWED; /* always allowed */
901
902         /* adjust states for indexing into physical_state_transitions */
903         old -= IB_PORTPHYSSTATE_POLLING;
904         new -= IB_PORTPHYSSTATE_POLLING;
905
906         if (old < 0 || new < 0)
907                 return HFI_TRANSITION_UNDEFINED;
908         return physical_state_transitions.allowed[old][new];
909 }
910
911 static int port_states_transition_allowed(struct hfi1_pportdata *ppd,
912                                           u32 logical_new, u32 physical_new)
913 {
914         u32 physical_old = driver_physical_state(ppd);
915         u32 logical_old = driver_logical_state(ppd);
916         int ret, logical_allowed, physical_allowed;
917
918         ret = logical_transition_allowed(logical_old, logical_new);
919         logical_allowed = ret;
920
921         if (ret == HFI_TRANSITION_DISALLOWED ||
922             ret == HFI_TRANSITION_UNDEFINED) {
923                 pr_warn("invalid logical state transition %s -> %s\n",
924                         opa_lstate_name(logical_old),
925                         opa_lstate_name(logical_new));
926                 return ret;
927         }
928
929         ret = physical_transition_allowed(physical_old, physical_new);
930         physical_allowed = ret;
931
932         if (ret == HFI_TRANSITION_DISALLOWED ||
933             ret == HFI_TRANSITION_UNDEFINED) {
934                 pr_warn("invalid physical state transition %s -> %s\n",
935                         opa_pstate_name(physical_old),
936                         opa_pstate_name(physical_new));
937                 return ret;
938         }
939
940         if (logical_allowed == HFI_TRANSITION_IGNORED &&
941             physical_allowed == HFI_TRANSITION_IGNORED)
942                 return HFI_TRANSITION_IGNORED;
943
944         /*
945          * A change request of Physical Port State from
946          * 'Offline' to 'Polling' should be ignored.
947          */
948         if ((physical_old == OPA_PORTPHYSSTATE_OFFLINE) &&
949             (physical_new == IB_PORTPHYSSTATE_POLLING))
950                 return HFI_TRANSITION_IGNORED;
951
952         /*
953          * Either physical_allowed or logical_allowed is
954          * HFI_TRANSITION_ALLOWED.
955          */
956         return HFI_TRANSITION_ALLOWED;
957 }
958
959 static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
960                            u32 logical_state, u32 phys_state,
961                            int suppress_idle_sma)
962 {
963         struct hfi1_devdata *dd = ppd->dd;
964         u32 link_state;
965         int ret;
966
967         ret = port_states_transition_allowed(ppd, logical_state, phys_state);
968         if (ret == HFI_TRANSITION_DISALLOWED ||
969             ret == HFI_TRANSITION_UNDEFINED) {
970                 /* error message emitted above */
971                 smp->status |= IB_SMP_INVALID_FIELD;
972                 return 0;
973         }
974
975         if (ret == HFI_TRANSITION_IGNORED)
976                 return 0;
977
978         if ((phys_state != IB_PORTPHYSSTATE_NOP) &&
979             !(logical_state == IB_PORT_DOWN ||
980               logical_state == IB_PORT_NOP)){
981                 pr_warn("SubnSet(OPA_PortInfo) port state invalid: logical_state 0x%x physical_state 0x%x\n",
982                         logical_state, phys_state);
983                 smp->status |= IB_SMP_INVALID_FIELD;
984         }
985
986         /*
987          * Logical state changes are summarized in OPAv1g1 spec.,
988          * Table 9-12; physical state changes are summarized in
989          * OPAv1g1 spec., Table 6.4.
990          */
991         switch (logical_state) {
992         case IB_PORT_NOP:
993                 if (phys_state == IB_PORTPHYSSTATE_NOP)
994                         break;
995                 /* FALLTHROUGH */
996         case IB_PORT_DOWN:
997                 if (phys_state == IB_PORTPHYSSTATE_NOP) {
998                         link_state = HLS_DN_DOWNDEF;
999                 } else if (phys_state == IB_PORTPHYSSTATE_POLLING) {
1000                         link_state = HLS_DN_POLL;
1001                         set_link_down_reason(ppd, OPA_LINKDOWN_REASON_FM_BOUNCE,
1002                                              0, OPA_LINKDOWN_REASON_FM_BOUNCE);
1003                 } else if (phys_state == IB_PORTPHYSSTATE_DISABLED) {
1004                         link_state = HLS_DN_DISABLE;
1005                 } else {
1006                         pr_warn("SubnSet(OPA_PortInfo) invalid physical state 0x%x\n",
1007                                 phys_state);
1008                         smp->status |= IB_SMP_INVALID_FIELD;
1009                         break;
1010                 }
1011
1012                 if ((link_state == HLS_DN_POLL ||
1013                      link_state == HLS_DN_DOWNDEF)) {
1014                         /*
1015                          * Going to poll.  No matter what the current state,
1016                          * always move offline first, then tune and start the
1017                          * link.  This correctly handles a FM link bounce and
1018                          * a link enable.  Going offline is a no-op if already
1019                          * offline.
1020                          */
1021                         set_link_state(ppd, HLS_DN_OFFLINE);
1022                         tune_serdes(ppd);
1023                         start_link(ppd);
1024                 } else {
1025                         set_link_state(ppd, link_state);
1026                 }
1027                 if (link_state == HLS_DN_DISABLE &&
1028                     (ppd->offline_disabled_reason >
1029                      HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED) ||
1030                      ppd->offline_disabled_reason ==
1031                      HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE)))
1032                         ppd->offline_disabled_reason =
1033                         HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED);
1034                 /*
1035                  * Don't send a reply if the response would be sent
1036                  * through the disabled port.
1037                  */
1038                 if (link_state == HLS_DN_DISABLE && smp->hop_cnt)
1039                         return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
1040                 break;
1041         case IB_PORT_ARMED:
1042                 ret = set_link_state(ppd, HLS_UP_ARMED);
1043                 if ((ret == 0) && (suppress_idle_sma == 0))
1044                         send_idle_sma(dd, SMA_IDLE_ARM);
1045                 break;
1046         case IB_PORT_ACTIVE:
1047                 if (ppd->neighbor_normal) {
1048                         ret = set_link_state(ppd, HLS_UP_ACTIVE);
1049                         if (ret == 0)
1050                                 send_idle_sma(dd, SMA_IDLE_ACTIVE);
1051                 } else {
1052                         pr_warn("SubnSet(OPA_PortInfo) Cannot move to Active with NeighborNormal 0\n");
1053                         smp->status |= IB_SMP_INVALID_FIELD;
1054                 }
1055                 break;
1056         default:
1057                 pr_warn("SubnSet(OPA_PortInfo) invalid logical state 0x%x\n",
1058                         logical_state);
1059                 smp->status |= IB_SMP_INVALID_FIELD;
1060         }
1061
1062         return 0;
1063 }
1064
1065 /**
1066  * subn_set_opa_portinfo - set port information
1067  * @smp: the incoming SM packet
1068  * @ibdev: the infiniband device
1069  * @port: the port on the device
1070  *
1071  */
1072 static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
1073                                    struct ib_device *ibdev, u8 port,
1074                                    u32 *resp_len)
1075 {
1076         struct opa_port_info *pi = (struct opa_port_info *)data;
1077         struct ib_event event;
1078         struct hfi1_devdata *dd;
1079         struct hfi1_pportdata *ppd;
1080         struct hfi1_ibport *ibp;
1081         u8 clientrereg;
1082         unsigned long flags;
1083         u32 smlid, opa_lid; /* tmp vars to hold LID values */
1084         u16 lid;
1085         u8 ls_old, ls_new, ps_new;
1086         u8 vls;
1087         u8 msl;
1088         u8 crc_enabled;
1089         u16 lse, lwe, mtu;
1090         u32 num_ports = OPA_AM_NPORT(am);
1091         u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
1092         int ret, i, invalid = 0, call_set_mtu = 0;
1093         int call_link_downgrade_policy = 0;
1094
1095         if (num_ports != 1) {
1096                 smp->status |= IB_SMP_INVALID_FIELD;
1097                 return reply((struct ib_mad_hdr *)smp);
1098         }
1099
1100         opa_lid = be32_to_cpu(pi->lid);
1101         if (opa_lid & 0xFFFF0000) {
1102                 pr_warn("OPA_PortInfo lid out of range: %X\n", opa_lid);
1103                 smp->status |= IB_SMP_INVALID_FIELD;
1104                 goto get_only;
1105         }
1106
1107         lid = (u16)(opa_lid & 0x0000FFFF);
1108
1109         smlid = be32_to_cpu(pi->sm_lid);
1110         if (smlid & 0xFFFF0000) {
1111                 pr_warn("OPA_PortInfo SM lid out of range: %X\n", smlid);
1112                 smp->status |= IB_SMP_INVALID_FIELD;
1113                 goto get_only;
1114         }
1115         smlid &= 0x0000FFFF;
1116
1117         clientrereg = (pi->clientrereg_subnettimeout &
1118                         OPA_PI_MASK_CLIENT_REREGISTER);
1119
1120         dd = dd_from_ibdev(ibdev);
1121         /* IB numbers ports from 1, hw from 0 */
1122         ppd = dd->pport + (port - 1);
1123         ibp = &ppd->ibport_data;
1124         event.device = ibdev;
1125         event.element.port_num = port;
1126
1127         ls_old = driver_lstate(ppd);
1128
1129         ibp->rvp.mkey = pi->mkey;
1130         ibp->rvp.gid_prefix = pi->subnet_prefix;
1131         ibp->rvp.mkey_lease_period = be16_to_cpu(pi->mkey_lease_period);
1132
1133         /* Must be a valid unicast LID address. */
1134         if ((lid == 0 && ls_old > IB_PORT_INIT) ||
1135             lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) {
1136                 smp->status |= IB_SMP_INVALID_FIELD;
1137                 pr_warn("SubnSet(OPA_PortInfo) lid invalid 0x%x\n",
1138                         lid);
1139         } else if (ppd->lid != lid ||
1140                  ppd->lmc != (pi->mkeyprotect_lmc & OPA_PI_MASK_LMC)) {
1141                 if (ppd->lid != lid)
1142                         hfi1_set_uevent_bits(ppd, _HFI1_EVENT_LID_CHANGE_BIT);
1143                 if (ppd->lmc != (pi->mkeyprotect_lmc & OPA_PI_MASK_LMC))
1144                         hfi1_set_uevent_bits(ppd, _HFI1_EVENT_LMC_CHANGE_BIT);
1145                 hfi1_set_lid(ppd, lid, pi->mkeyprotect_lmc & OPA_PI_MASK_LMC);
1146                 event.event = IB_EVENT_LID_CHANGE;
1147                 ib_dispatch_event(&event);
1148         }
1149
1150         msl = pi->smsl & OPA_PI_MASK_SMSL;
1151         if (pi->partenforce_filterraw & OPA_PI_MASK_LINKINIT_REASON)
1152                 ppd->linkinit_reason =
1153                         (pi->partenforce_filterraw &
1154                          OPA_PI_MASK_LINKINIT_REASON);
1155         /* enable/disable SW pkey checking as per FM control */
1156         if (pi->partenforce_filterraw & OPA_PI_MASK_PARTITION_ENFORCE_IN)
1157                 ppd->part_enforce |= HFI1_PART_ENFORCE_IN;
1158         else
1159                 ppd->part_enforce &= ~HFI1_PART_ENFORCE_IN;
1160
1161         if (pi->partenforce_filterraw & OPA_PI_MASK_PARTITION_ENFORCE_OUT)
1162                 ppd->part_enforce |= HFI1_PART_ENFORCE_OUT;
1163         else
1164                 ppd->part_enforce &= ~HFI1_PART_ENFORCE_OUT;
1165
1166         /* Must be a valid unicast LID address. */
1167         if ((smlid == 0 && ls_old > IB_PORT_INIT) ||
1168             smlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) {
1169                 smp->status |= IB_SMP_INVALID_FIELD;
1170                 pr_warn("SubnSet(OPA_PortInfo) smlid invalid 0x%x\n", smlid);
1171         } else if (smlid != ibp->rvp.sm_lid || msl != ibp->rvp.sm_sl) {
1172                 pr_warn("SubnSet(OPA_PortInfo) smlid 0x%x\n", smlid);
1173                 spin_lock_irqsave(&ibp->rvp.lock, flags);
1174                 if (ibp->rvp.sm_ah) {
1175                         if (smlid != ibp->rvp.sm_lid)
1176                                 ibp->rvp.sm_ah->attr.dlid = smlid;
1177                         if (msl != ibp->rvp.sm_sl)
1178                                 ibp->rvp.sm_ah->attr.sl = msl;
1179                 }
1180                 spin_unlock_irqrestore(&ibp->rvp.lock, flags);
1181                 if (smlid != ibp->rvp.sm_lid)
1182                         ibp->rvp.sm_lid = smlid;
1183                 if (msl != ibp->rvp.sm_sl)
1184                         ibp->rvp.sm_sl = msl;
1185                 event.event = IB_EVENT_SM_CHANGE;
1186                 ib_dispatch_event(&event);
1187         }
1188
1189         if (pi->link_down_reason == 0) {
1190                 ppd->local_link_down_reason.sma = 0;
1191                 ppd->local_link_down_reason.latest = 0;
1192         }
1193
1194         if (pi->neigh_link_down_reason == 0) {
1195                 ppd->neigh_link_down_reason.sma = 0;
1196                 ppd->neigh_link_down_reason.latest = 0;
1197         }
1198
1199         ppd->sm_trap_qp = be32_to_cpu(pi->sm_trap_qp);
1200         ppd->sa_qp = be32_to_cpu(pi->sa_qp);
1201
1202         ppd->port_error_action = be32_to_cpu(pi->port_error_action);
1203         lwe = be16_to_cpu(pi->link_width.enabled);
1204         if (lwe) {
1205                 if (lwe == OPA_LINK_WIDTH_RESET ||
1206                     lwe == OPA_LINK_WIDTH_RESET_OLD)
1207                         set_link_width_enabled(ppd, ppd->link_width_supported);
1208                 else if ((lwe & ~ppd->link_width_supported) == 0)
1209                         set_link_width_enabled(ppd, lwe);
1210                 else
1211                         smp->status |= IB_SMP_INVALID_FIELD;
1212         }
1213         lwe = be16_to_cpu(pi->link_width_downgrade.enabled);
1214         /* LWD.E is always applied - 0 means "disabled" */
1215         if (lwe == OPA_LINK_WIDTH_RESET ||
1216             lwe == OPA_LINK_WIDTH_RESET_OLD) {
1217                 set_link_width_downgrade_enabled(ppd,
1218                                                  ppd->
1219                                                  link_width_downgrade_supported
1220                                                  );
1221         } else if ((lwe & ~ppd->link_width_downgrade_supported) == 0) {
1222                 /* only set and apply if something changed */
1223                 if (lwe != ppd->link_width_downgrade_enabled) {
1224                         set_link_width_downgrade_enabled(ppd, lwe);
1225                         call_link_downgrade_policy = 1;
1226                 }
1227         } else {
1228                 smp->status |= IB_SMP_INVALID_FIELD;
1229         }
1230         lse = be16_to_cpu(pi->link_speed.enabled);
1231         if (lse) {
1232                 if (lse & be16_to_cpu(pi->link_speed.supported))
1233                         set_link_speed_enabled(ppd, lse);
1234                 else
1235                         smp->status |= IB_SMP_INVALID_FIELD;
1236         }
1237
1238         ibp->rvp.mkeyprot =
1239                 (pi->mkeyprotect_lmc & OPA_PI_MASK_MKEY_PROT_BIT) >> 6;
1240         ibp->rvp.vl_high_limit = be16_to_cpu(pi->vl.high_limit) & 0xFF;
1241         (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_VL_HIGH_LIMIT,
1242                                     ibp->rvp.vl_high_limit);
1243
1244         if (ppd->vls_supported / 2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
1245             ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
1246                 smp->status |= IB_SMP_INVALID_FIELD;
1247                 return reply((struct ib_mad_hdr *)smp);
1248         }
1249         for (i = 0; i < ppd->vls_supported; i++) {
1250                 if ((i % 2) == 0)
1251                         mtu = enum_to_mtu((pi->neigh_mtu.pvlx_to_mtu[i / 2] >>
1252                                            4) & 0xF);
1253                 else
1254                         mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[i / 2] &
1255                                           0xF);
1256                 if (mtu == 0xffff) {
1257                         pr_warn("SubnSet(OPA_PortInfo) mtu invalid %d (0x%x)\n",
1258                                 mtu,
1259                                 (pi->neigh_mtu.pvlx_to_mtu[0] >> 4) & 0xF);
1260                         smp->status |= IB_SMP_INVALID_FIELD;
1261                         mtu = hfi1_max_mtu; /* use a valid MTU */
1262                 }
1263                 if (dd->vld[i].mtu != mtu) {
1264                         dd_dev_info(dd,
1265                                     "MTU change on vl %d from %d to %d\n",
1266                                     i, dd->vld[i].mtu, mtu);
1267                         dd->vld[i].mtu = mtu;
1268                         call_set_mtu++;
1269                 }
1270         }
1271         /* As per OPAV1 spec: VL15 must support and be configured
1272          * for operation with a 2048 or larger MTU.
1273          */
1274         mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[15 / 2] & 0xF);
1275         if (mtu < 2048 || mtu == 0xffff)
1276                 mtu = 2048;
1277         if (dd->vld[15].mtu != mtu) {
1278                 dd_dev_info(dd,
1279                             "MTU change on vl 15 from %d to %d\n",
1280                             dd->vld[15].mtu, mtu);
1281                 dd->vld[15].mtu = mtu;
1282                 call_set_mtu++;
1283         }
1284         if (call_set_mtu)
1285                 set_mtu(ppd);
1286
1287         /* Set operational VLs */
1288         vls = pi->operational_vls & OPA_PI_MASK_OPERATIONAL_VL;
1289         if (vls) {
1290                 if (vls > ppd->vls_supported) {
1291                         pr_warn("SubnSet(OPA_PortInfo) VL's supported invalid %d\n",
1292                                 pi->operational_vls);
1293                         smp->status |= IB_SMP_INVALID_FIELD;
1294                 } else {
1295                         if (hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_OP_VLS,
1296                                             vls) == -EINVAL)
1297                                 smp->status |= IB_SMP_INVALID_FIELD;
1298                 }
1299         }
1300
1301         if (pi->mkey_violations == 0)
1302                 ibp->rvp.mkey_violations = 0;
1303
1304         if (pi->pkey_violations == 0)
1305                 ibp->rvp.pkey_violations = 0;
1306
1307         if (pi->qkey_violations == 0)
1308                 ibp->rvp.qkey_violations = 0;
1309
1310         ibp->rvp.subnet_timeout =
1311                 pi->clientrereg_subnettimeout & OPA_PI_MASK_SUBNET_TIMEOUT;
1312
1313         crc_enabled = be16_to_cpu(pi->port_ltp_crc_mode);
1314         crc_enabled >>= 4;
1315         crc_enabled &= 0xf;
1316
1317         if (crc_enabled != 0)
1318                 ppd->port_crc_mode_enabled = port_ltp_to_cap(crc_enabled);
1319
1320         ppd->is_active_optimize_enabled =
1321                         !!(be16_to_cpu(pi->port_mode)
1322                                         & OPA_PI_MASK_PORT_ACTIVE_OPTOMIZE);
1323
1324         ls_new = pi->port_states.portphysstate_portstate &
1325                         OPA_PI_MASK_PORT_STATE;
1326         ps_new = (pi->port_states.portphysstate_portstate &
1327                         OPA_PI_MASK_PORT_PHYSICAL_STATE) >> 4;
1328
1329         if (ls_old == IB_PORT_INIT) {
1330                 if (start_of_sm_config) {
1331                         if (ls_new == ls_old || (ls_new == IB_PORT_ARMED))
1332                                 ppd->is_sm_config_started = 1;
1333                 } else if (ls_new == IB_PORT_ARMED) {
1334                         if (ppd->is_sm_config_started == 0)
1335                                 invalid = 1;
1336                 }
1337         }
1338
1339         /* Handle CLIENT_REREGISTER event b/c SM asked us for it */
1340         if (clientrereg) {
1341                 event.event = IB_EVENT_CLIENT_REREGISTER;
1342                 ib_dispatch_event(&event);
1343         }
1344
1345         /*
1346          * Do the port state change now that the other link parameters
1347          * have been set.
1348          * Changing the port physical state only makes sense if the link
1349          * is down or is being set to down.
1350          */
1351
1352         ret = set_port_states(ppd, smp, ls_new, ps_new, invalid);
1353         if (ret)
1354                 return ret;
1355
1356         ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len);
1357
1358         /* restore re-reg bit per o14-12.2.1 */
1359         pi->clientrereg_subnettimeout |= clientrereg;
1360
1361         /*
1362          * Apply the new link downgrade policy.  This may result in a link
1363          * bounce.  Do this after everything else so things are settled.
1364          * Possible problem: if setting the port state above fails, then
1365          * the policy change is not applied.
1366          */
1367         if (call_link_downgrade_policy)
1368                 apply_link_downgrade_policy(ppd, 0);
1369
1370         return ret;
1371
1372 get_only:
1373         return __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len);
1374 }
1375
1376 /**
1377  * set_pkeys - set the PKEY table for ctxt 0
1378  * @dd: the hfi1_ib device
1379  * @port: the IB port number
1380  * @pkeys: the PKEY table
1381  */
1382 static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
1383 {
1384         struct hfi1_pportdata *ppd;
1385         int i;
1386         int changed = 0;
1387         int update_includes_mgmt_partition = 0;
1388
1389         /*
1390          * IB port one/two always maps to context zero/one,
1391          * always a kernel context, no locking needed
1392          * If we get here with ppd setup, no need to check
1393          * that rcd is valid.
1394          */
1395         ppd = dd->pport + (port - 1);
1396         /*
1397          * If the update does not include the management pkey, don't do it.
1398          */
1399         for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) {
1400                 if (pkeys[i] == LIM_MGMT_P_KEY) {
1401                         update_includes_mgmt_partition = 1;
1402                         break;
1403                 }
1404         }
1405
1406         if (!update_includes_mgmt_partition)
1407                 return 1;
1408
1409         for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) {
1410                 u16 key = pkeys[i];
1411                 u16 okey = ppd->pkeys[i];
1412
1413                 if (key == okey)
1414                         continue;
1415                 /*
1416                  * Don't update pkeys[2], if an HFI port without MgmtAllowed
1417                  * by neighbor is a switch.
1418                  */
1419                 if (i == 2 && !ppd->mgmt_allowed && ppd->neighbor_type == 1)
1420                         continue;
1421                 /*
1422                  * The SM gives us the complete PKey table. We have
1423                  * to ensure that we put the PKeys in the matching
1424                  * slots.
1425                  */
1426                 ppd->pkeys[i] = key;
1427                 changed = 1;
1428         }
1429
1430         if (changed) {
1431                 (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
1432                 hfi1_event_pkey_change(dd, port);
1433         }
1434
1435         return 0;
1436 }
1437
1438 static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
1439                                     struct ib_device *ibdev, u8 port,
1440                                     u32 *resp_len)
1441 {
1442         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1443         u32 n_blocks_sent = OPA_AM_NBLK(am);
1444         u32 start_block = am & 0x7ff;
1445         u16 *p = (u16 *)data;
1446         __be16 *q = (__be16 *)data;
1447         int i;
1448         u16 n_blocks_avail;
1449         unsigned npkeys = hfi1_get_npkeys(dd);
1450
1451         if (n_blocks_sent == 0) {
1452                 pr_warn("OPA Get PKey AM Invalid : P = %d; B = 0x%x; N = 0x%x\n",
1453                         port, start_block, n_blocks_sent);
1454                 smp->status |= IB_SMP_INVALID_FIELD;
1455                 return reply((struct ib_mad_hdr *)smp);
1456         }
1457
1458         n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1;
1459
1460         if (start_block + n_blocks_sent > n_blocks_avail ||
1461             n_blocks_sent > OPA_NUM_PKEY_BLOCKS_PER_SMP) {
1462                 pr_warn("OPA Set PKey AM Invalid : s 0x%x; req 0x%x; avail 0x%x; blk/smp 0x%lx\n",
1463                         start_block, n_blocks_sent, n_blocks_avail,
1464                         OPA_NUM_PKEY_BLOCKS_PER_SMP);
1465                 smp->status |= IB_SMP_INVALID_FIELD;
1466                 return reply((struct ib_mad_hdr *)smp);
1467         }
1468
1469         for (i = 0; i < n_blocks_sent * OPA_PARTITION_TABLE_BLK_SIZE; i++)
1470                 p[i] = be16_to_cpu(q[i]);
1471
1472         if (start_block == 0 && set_pkeys(dd, port, p) != 0) {
1473                 smp->status |= IB_SMP_INVALID_FIELD;
1474                 return reply((struct ib_mad_hdr *)smp);
1475         }
1476
1477         return __subn_get_opa_pkeytable(smp, am, data, ibdev, port, resp_len);
1478 }
1479
1480 static int get_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
1481 {
1482         u64 *val = data;
1483
1484         *val++ = read_csr(dd, SEND_SC2VLT0);
1485         *val++ = read_csr(dd, SEND_SC2VLT1);
1486         *val++ = read_csr(dd, SEND_SC2VLT2);
1487         *val++ = read_csr(dd, SEND_SC2VLT3);
1488         return 0;
1489 }
1490
1491 #define ILLEGAL_VL 12
1492 /*
1493  * filter_sc2vlt changes mappings to VL15 to ILLEGAL_VL (except
1494  * for SC15, which must map to VL15). If we don't remap things this
1495  * way it is possible for VL15 counters to increment when we try to
1496  * send on a SC which is mapped to an invalid VL.
1497  */
1498 static void filter_sc2vlt(void *data)
1499 {
1500         int i;
1501         u8 *pd = data;
1502
1503         for (i = 0; i < OPA_MAX_SCS; i++) {
1504                 if (i == 15)
1505                         continue;
1506                 if ((pd[i] & 0x1f) == 0xf)
1507                         pd[i] = ILLEGAL_VL;
1508         }
1509 }
1510
1511 static int set_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
1512 {
1513         u64 *val = data;
1514
1515         filter_sc2vlt(data);
1516
1517         write_csr(dd, SEND_SC2VLT0, *val++);
1518         write_csr(dd, SEND_SC2VLT1, *val++);
1519         write_csr(dd, SEND_SC2VLT2, *val++);
1520         write_csr(dd, SEND_SC2VLT3, *val++);
1521         write_seqlock_irq(&dd->sc2vl_lock);
1522         memcpy(dd->sc2vl, data, sizeof(dd->sc2vl));
1523         write_sequnlock_irq(&dd->sc2vl_lock);
1524         return 0;
1525 }
1526
1527 static int __subn_get_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
1528                                    struct ib_device *ibdev, u8 port,
1529                                    u32 *resp_len)
1530 {
1531         struct hfi1_ibport *ibp = to_iport(ibdev, port);
1532         u8 *p = data;
1533         size_t size = ARRAY_SIZE(ibp->sl_to_sc); /* == 32 */
1534         unsigned i;
1535
1536         if (am) {
1537                 smp->status |= IB_SMP_INVALID_FIELD;
1538                 return reply((struct ib_mad_hdr *)smp);
1539         }
1540
1541         for (i = 0; i < ARRAY_SIZE(ibp->sl_to_sc); i++)
1542                 *p++ = ibp->sl_to_sc[i];
1543
1544         if (resp_len)
1545                 *resp_len += size;
1546
1547         return reply((struct ib_mad_hdr *)smp);
1548 }
1549
1550 static int __subn_set_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
1551                                    struct ib_device *ibdev, u8 port,
1552                                    u32 *resp_len)
1553 {
1554         struct hfi1_ibport *ibp = to_iport(ibdev, port);
1555         u8 *p = data;
1556         int i;
1557         u8 sc;
1558
1559         if (am) {
1560                 smp->status |= IB_SMP_INVALID_FIELD;
1561                 return reply((struct ib_mad_hdr *)smp);
1562         }
1563
1564         for (i = 0; i <  ARRAY_SIZE(ibp->sl_to_sc); i++) {
1565                 sc = *p++;
1566                 if (ibp->sl_to_sc[i] != sc) {
1567                         ibp->sl_to_sc[i] = sc;
1568
1569                         /* Put all stale qps into error state */
1570                         hfi1_error_port_qps(ibp, i);
1571                 }
1572         }
1573
1574         return __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port, resp_len);
1575 }
1576
1577 static int __subn_get_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
1578                                    struct ib_device *ibdev, u8 port,
1579                                    u32 *resp_len)
1580 {
1581         struct hfi1_ibport *ibp = to_iport(ibdev, port);
1582         u8 *p = data;
1583         size_t size = ARRAY_SIZE(ibp->sc_to_sl); /* == 32 */
1584         unsigned i;
1585
1586         if (am) {
1587                 smp->status |= IB_SMP_INVALID_FIELD;
1588                 return reply((struct ib_mad_hdr *)smp);
1589         }
1590
1591         for (i = 0; i < ARRAY_SIZE(ibp->sc_to_sl); i++)
1592                 *p++ = ibp->sc_to_sl[i];
1593
1594         if (resp_len)
1595                 *resp_len += size;
1596
1597         return reply((struct ib_mad_hdr *)smp);
1598 }
1599
1600 static int __subn_set_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
1601                                    struct ib_device *ibdev, u8 port,
1602                                    u32 *resp_len)
1603 {
1604         struct hfi1_ibport *ibp = to_iport(ibdev, port);
1605         u8 *p = data;
1606         int i;
1607
1608         if (am) {
1609                 smp->status |= IB_SMP_INVALID_FIELD;
1610                 return reply((struct ib_mad_hdr *)smp);
1611         }
1612
1613         for (i = 0; i < ARRAY_SIZE(ibp->sc_to_sl); i++)
1614                 ibp->sc_to_sl[i] = *p++;
1615
1616         return __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port, resp_len);
1617 }
1618
1619 static int __subn_get_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
1620                                     struct ib_device *ibdev, u8 port,
1621                                     u32 *resp_len)
1622 {
1623         u32 n_blocks = OPA_AM_NBLK(am);
1624         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1625         void *vp = (void *)data;
1626         size_t size = 4 * sizeof(u64);
1627
1628         if (n_blocks != 1) {
1629                 smp->status |= IB_SMP_INVALID_FIELD;
1630                 return reply((struct ib_mad_hdr *)smp);
1631         }
1632
1633         get_sc2vlt_tables(dd, vp);
1634
1635         if (resp_len)
1636                 *resp_len += size;
1637
1638         return reply((struct ib_mad_hdr *)smp);
1639 }
1640
1641 static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
1642                                     struct ib_device *ibdev, u8 port,
1643                                     u32 *resp_len)
1644 {
1645         u32 n_blocks = OPA_AM_NBLK(am);
1646         int async_update = OPA_AM_ASYNC(am);
1647         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1648         void *vp = (void *)data;
1649         struct hfi1_pportdata *ppd;
1650         int lstate;
1651
1652         if (n_blocks != 1 || async_update) {
1653                 smp->status |= IB_SMP_INVALID_FIELD;
1654                 return reply((struct ib_mad_hdr *)smp);
1655         }
1656
1657         /* IB numbers ports from 1, hw from 0 */
1658         ppd = dd->pport + (port - 1);
1659         lstate = driver_lstate(ppd);
1660         /*
1661          * it's known that async_update is 0 by this point, but include
1662          * the explicit check for clarity
1663          */
1664         if (!async_update &&
1665             (lstate == IB_PORT_ARMED || lstate == IB_PORT_ACTIVE)) {
1666                 smp->status |= IB_SMP_INVALID_FIELD;
1667                 return reply((struct ib_mad_hdr *)smp);
1668         }
1669
1670         set_sc2vlt_tables(dd, vp);
1671
1672         return __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port, resp_len);
1673 }
1674
1675 static int __subn_get_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
1676                                      struct ib_device *ibdev, u8 port,
1677                                      u32 *resp_len)
1678 {
1679         u32 n_blocks = OPA_AM_NPORT(am);
1680         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1681         struct hfi1_pportdata *ppd;
1682         void *vp = (void *)data;
1683         int size;
1684
1685         if (n_blocks != 1) {
1686                 smp->status |= IB_SMP_INVALID_FIELD;
1687                 return reply((struct ib_mad_hdr *)smp);
1688         }
1689
1690         ppd = dd->pport + (port - 1);
1691
1692         size = fm_get_table(ppd, FM_TBL_SC2VLNT, vp);
1693
1694         if (resp_len)
1695                 *resp_len += size;
1696
1697         return reply((struct ib_mad_hdr *)smp);
1698 }
1699
1700 static int __subn_set_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
1701                                      struct ib_device *ibdev, u8 port,
1702                                      u32 *resp_len)
1703 {
1704         u32 n_blocks = OPA_AM_NPORT(am);
1705         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1706         struct hfi1_pportdata *ppd;
1707         void *vp = (void *)data;
1708         int lstate;
1709
1710         if (n_blocks != 1) {
1711                 smp->status |= IB_SMP_INVALID_FIELD;
1712                 return reply((struct ib_mad_hdr *)smp);
1713         }
1714
1715         /* IB numbers ports from 1, hw from 0 */
1716         ppd = dd->pport + (port - 1);
1717         lstate = driver_lstate(ppd);
1718         if (lstate == IB_PORT_ARMED || lstate == IB_PORT_ACTIVE) {
1719                 smp->status |= IB_SMP_INVALID_FIELD;
1720                 return reply((struct ib_mad_hdr *)smp);
1721         }
1722
1723         ppd = dd->pport + (port - 1);
1724
1725         fm_set_table(ppd, FM_TBL_SC2VLNT, vp);
1726
1727         return __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port,
1728                                          resp_len);
1729 }
1730
1731 static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
1732                               struct ib_device *ibdev, u8 port,
1733                               u32 *resp_len)
1734 {
1735         u32 nports = OPA_AM_NPORT(am);
1736         u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
1737         u32 lstate;
1738         struct hfi1_ibport *ibp;
1739         struct hfi1_pportdata *ppd;
1740         struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
1741
1742         if (nports != 1) {
1743                 smp->status |= IB_SMP_INVALID_FIELD;
1744                 return reply((struct ib_mad_hdr *)smp);
1745         }
1746
1747         ibp = to_iport(ibdev, port);
1748         ppd = ppd_from_ibp(ibp);
1749
1750         lstate = driver_lstate(ppd);
1751
1752         if (start_of_sm_config && (lstate == IB_PORT_INIT))
1753                 ppd->is_sm_config_started = 1;
1754
1755 #if PI_LED_ENABLE_SUP
1756         psi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4;
1757         psi->port_states.ledenable_offlinereason |=
1758                 ppd->is_sm_config_started << 5;
1759         psi->port_states.ledenable_offlinereason |=
1760                 ppd->offline_disabled_reason;
1761 #else
1762         psi->port_states.offline_reason = ppd->neighbor_normal << 4;
1763         psi->port_states.offline_reason |= ppd->is_sm_config_started << 5;
1764         psi->port_states.offline_reason |= ppd->offline_disabled_reason;
1765 #endif /* PI_LED_ENABLE_SUP */
1766
1767         psi->port_states.portphysstate_portstate =
1768                 (hfi1_ibphys_portstate(ppd) << 4) | (lstate & 0xf);
1769         psi->link_width_downgrade_tx_active =
1770                 cpu_to_be16(ppd->link_width_downgrade_tx_active);
1771         psi->link_width_downgrade_rx_active =
1772                 cpu_to_be16(ppd->link_width_downgrade_rx_active);
1773         if (resp_len)
1774                 *resp_len += sizeof(struct opa_port_state_info);
1775
1776         return reply((struct ib_mad_hdr *)smp);
1777 }
1778
1779 static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
1780                               struct ib_device *ibdev, u8 port,
1781                               u32 *resp_len)
1782 {
1783         u32 nports = OPA_AM_NPORT(am);
1784         u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
1785         u32 ls_old;
1786         u8 ls_new, ps_new;
1787         struct hfi1_ibport *ibp;
1788         struct hfi1_pportdata *ppd;
1789         struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
1790         int ret, invalid = 0;
1791
1792         if (nports != 1) {
1793                 smp->status |= IB_SMP_INVALID_FIELD;
1794                 return reply((struct ib_mad_hdr *)smp);
1795         }
1796
1797         ibp = to_iport(ibdev, port);
1798         ppd = ppd_from_ibp(ibp);
1799
1800         ls_old = driver_lstate(ppd);
1801
1802         ls_new = port_states_to_logical_state(&psi->port_states);
1803         ps_new = port_states_to_phys_state(&psi->port_states);
1804
1805         if (ls_old == IB_PORT_INIT) {
1806                 if (start_of_sm_config) {
1807                         if (ls_new == ls_old || (ls_new == IB_PORT_ARMED))
1808                                 ppd->is_sm_config_started = 1;
1809                 } else if (ls_new == IB_PORT_ARMED) {
1810                         if (ppd->is_sm_config_started == 0)
1811                                 invalid = 1;
1812                 }
1813         }
1814
1815         ret = set_port_states(ppd, smp, ls_new, ps_new, invalid);
1816         if (ret)
1817                 return ret;
1818
1819         if (invalid)
1820                 smp->status |= IB_SMP_INVALID_FIELD;
1821
1822         return __subn_get_opa_psi(smp, am, data, ibdev, port, resp_len);
1823 }
1824
1825 static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data,
1826                                      struct ib_device *ibdev, u8 port,
1827                                      u32 *resp_len)
1828 {
1829         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1830         u32 addr = OPA_AM_CI_ADDR(am);
1831         u32 len = OPA_AM_CI_LEN(am) + 1;
1832         int ret;
1833
1834 #define __CI_PAGE_SIZE BIT(7) /* 128 bytes */
1835 #define __CI_PAGE_MASK ~(__CI_PAGE_SIZE - 1)
1836 #define __CI_PAGE_NUM(a) ((a) & __CI_PAGE_MASK)
1837
1838         /*
1839          * check that addr is within spec, and
1840          * addr and (addr + len - 1) are on the same "page"
1841          */
1842         if (addr >= 4096 ||
1843             (__CI_PAGE_NUM(addr) != __CI_PAGE_NUM(addr + len - 1))) {
1844                 smp->status |= IB_SMP_INVALID_FIELD;
1845                 return reply((struct ib_mad_hdr *)smp);
1846         }
1847
1848         ret = get_cable_info(dd, port, addr, len, data);
1849
1850         if (ret == -ENODEV) {
1851                 smp->status |= IB_SMP_UNSUP_METH_ATTR;
1852                 return reply((struct ib_mad_hdr *)smp);
1853         }
1854
1855         /* The address range for the CableInfo SMA query is wider than the
1856          * memory available on the QSFP cable. We want to return a valid
1857          * response, albeit zeroed out, for address ranges beyond available
1858          * memory but that are within the CableInfo query spec
1859          */
1860         if (ret < 0 && ret != -ERANGE) {
1861                 smp->status |= IB_SMP_INVALID_FIELD;
1862                 return reply((struct ib_mad_hdr *)smp);
1863         }
1864
1865         if (resp_len)
1866                 *resp_len += len;
1867
1868         return reply((struct ib_mad_hdr *)smp);
1869 }
1870
1871 static int __subn_get_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
1872                               struct ib_device *ibdev, u8 port, u32 *resp_len)
1873 {
1874         u32 num_ports = OPA_AM_NPORT(am);
1875         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1876         struct hfi1_pportdata *ppd;
1877         struct buffer_control *p = (struct buffer_control *)data;
1878         int size;
1879
1880         if (num_ports != 1) {
1881                 smp->status |= IB_SMP_INVALID_FIELD;
1882                 return reply((struct ib_mad_hdr *)smp);
1883         }
1884
1885         ppd = dd->pport + (port - 1);
1886         size = fm_get_table(ppd, FM_TBL_BUFFER_CONTROL, p);
1887         trace_bct_get(dd, p);
1888         if (resp_len)
1889                 *resp_len += size;
1890
1891         return reply((struct ib_mad_hdr *)smp);
1892 }
1893
1894 static int __subn_set_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
1895                               struct ib_device *ibdev, u8 port, u32 *resp_len)
1896 {
1897         u32 num_ports = OPA_AM_NPORT(am);
1898         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1899         struct hfi1_pportdata *ppd;
1900         struct buffer_control *p = (struct buffer_control *)data;
1901
1902         if (num_ports != 1) {
1903                 smp->status |= IB_SMP_INVALID_FIELD;
1904                 return reply((struct ib_mad_hdr *)smp);
1905         }
1906         ppd = dd->pport + (port - 1);
1907         trace_bct_set(dd, p);
1908         if (fm_set_table(ppd, FM_TBL_BUFFER_CONTROL, p) < 0) {
1909                 smp->status |= IB_SMP_INVALID_FIELD;
1910                 return reply((struct ib_mad_hdr *)smp);
1911         }
1912
1913         return __subn_get_opa_bct(smp, am, data, ibdev, port, resp_len);
1914 }
1915
1916 static int __subn_get_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
1917                                  struct ib_device *ibdev, u8 port,
1918                                  u32 *resp_len)
1919 {
1920         struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port));
1921         u32 num_ports = OPA_AM_NPORT(am);
1922         u8 section = (am & 0x00ff0000) >> 16;
1923         u8 *p = data;
1924         int size = 0;
1925
1926         if (num_ports != 1) {
1927                 smp->status |= IB_SMP_INVALID_FIELD;
1928                 return reply((struct ib_mad_hdr *)smp);
1929         }
1930
1931         switch (section) {
1932         case OPA_VLARB_LOW_ELEMENTS:
1933                 size = fm_get_table(ppd, FM_TBL_VL_LOW_ARB, p);
1934                 break;
1935         case OPA_VLARB_HIGH_ELEMENTS:
1936                 size = fm_get_table(ppd, FM_TBL_VL_HIGH_ARB, p);
1937                 break;
1938         case OPA_VLARB_PREEMPT_ELEMENTS:
1939                 size = fm_get_table(ppd, FM_TBL_VL_PREEMPT_ELEMS, p);
1940                 break;
1941         case OPA_VLARB_PREEMPT_MATRIX:
1942                 size = fm_get_table(ppd, FM_TBL_VL_PREEMPT_MATRIX, p);
1943                 break;
1944         default:
1945                 pr_warn("OPA SubnGet(VL Arb) AM Invalid : 0x%x\n",
1946                         be32_to_cpu(smp->attr_mod));
1947                 smp->status |= IB_SMP_INVALID_FIELD;
1948                 break;
1949         }
1950
1951         if (size > 0 && resp_len)
1952                 *resp_len += size;
1953
1954         return reply((struct ib_mad_hdr *)smp);
1955 }
1956
1957 static int __subn_set_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
1958                                  struct ib_device *ibdev, u8 port,
1959                                  u32 *resp_len)
1960 {
1961         struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port));
1962         u32 num_ports = OPA_AM_NPORT(am);
1963         u8 section = (am & 0x00ff0000) >> 16;
1964         u8 *p = data;
1965
1966         if (num_ports != 1) {
1967                 smp->status |= IB_SMP_INVALID_FIELD;
1968                 return reply((struct ib_mad_hdr *)smp);
1969         }
1970
1971         switch (section) {
1972         case OPA_VLARB_LOW_ELEMENTS:
1973                 (void)fm_set_table(ppd, FM_TBL_VL_LOW_ARB, p);
1974                 break;
1975         case OPA_VLARB_HIGH_ELEMENTS:
1976                 (void)fm_set_table(ppd, FM_TBL_VL_HIGH_ARB, p);
1977                 break;
1978         /*
1979          * neither OPA_VLARB_PREEMPT_ELEMENTS, or OPA_VLARB_PREEMPT_MATRIX
1980          * can be changed from the default values
1981          */
1982         case OPA_VLARB_PREEMPT_ELEMENTS:
1983                 /* FALLTHROUGH */
1984         case OPA_VLARB_PREEMPT_MATRIX:
1985                 smp->status |= IB_SMP_UNSUP_METH_ATTR;
1986                 break;
1987         default:
1988                 pr_warn("OPA SubnSet(VL Arb) AM Invalid : 0x%x\n",
1989                         be32_to_cpu(smp->attr_mod));
1990                 smp->status |= IB_SMP_INVALID_FIELD;
1991                 break;
1992         }
1993
1994         return __subn_get_opa_vl_arb(smp, am, data, ibdev, port, resp_len);
1995 }
1996
1997 struct opa_pma_mad {
1998         struct ib_mad_hdr mad_hdr;
1999         u8 data[2024];
2000 } __packed;
2001
2002 struct opa_class_port_info {
2003         u8 base_version;
2004         u8 class_version;
2005         __be16 cap_mask;
2006         __be32 cap_mask2_resp_time;
2007
2008         u8 redirect_gid[16];
2009         __be32 redirect_tc_fl;
2010         __be32 redirect_lid;
2011         __be32 redirect_sl_qp;
2012         __be32 redirect_qkey;
2013
2014         u8 trap_gid[16];
2015         __be32 trap_tc_fl;
2016         __be32 trap_lid;
2017         __be32 trap_hl_qp;
2018         __be32 trap_qkey;
2019
2020         __be16 trap_pkey;
2021         __be16 redirect_pkey;
2022
2023         u8 trap_sl_rsvd;
2024         u8 reserved[3];
2025 } __packed;
2026
2027 struct opa_port_status_req {
2028         __u8 port_num;
2029         __u8 reserved[3];
2030         __be32 vl_select_mask;
2031 };
2032
2033 #define VL_MASK_ALL             0x000080ff
2034
2035 struct opa_port_status_rsp {
2036         __u8 port_num;
2037         __u8 reserved[3];
2038         __be32  vl_select_mask;
2039
2040         /* Data counters */
2041         __be64 port_xmit_data;
2042         __be64 port_rcv_data;
2043         __be64 port_xmit_pkts;
2044         __be64 port_rcv_pkts;
2045         __be64 port_multicast_xmit_pkts;
2046         __be64 port_multicast_rcv_pkts;
2047         __be64 port_xmit_wait;
2048         __be64 sw_port_congestion;
2049         __be64 port_rcv_fecn;
2050         __be64 port_rcv_becn;
2051         __be64 port_xmit_time_cong;
2052         __be64 port_xmit_wasted_bw;
2053         __be64 port_xmit_wait_data;
2054         __be64 port_rcv_bubble;
2055         __be64 port_mark_fecn;
2056         /* Error counters */
2057         __be64 port_rcv_constraint_errors;
2058         __be64 port_rcv_switch_relay_errors;
2059         __be64 port_xmit_discards;
2060         __be64 port_xmit_constraint_errors;
2061         __be64 port_rcv_remote_physical_errors;
2062         __be64 local_link_integrity_errors;
2063         __be64 port_rcv_errors;
2064         __be64 excessive_buffer_overruns;
2065         __be64 fm_config_errors;
2066         __be32 link_error_recovery;
2067         __be32 link_downed;
2068         u8 uncorrectable_errors;
2069
2070         u8 link_quality_indicator; /* 5res, 3bit */
2071         u8 res2[6];
2072         struct _vls_pctrs {
2073                 /* per-VL Data counters */
2074                 __be64 port_vl_xmit_data;
2075                 __be64 port_vl_rcv_data;
2076                 __be64 port_vl_xmit_pkts;
2077                 __be64 port_vl_rcv_pkts;
2078                 __be64 port_vl_xmit_wait;
2079                 __be64 sw_port_vl_congestion;
2080                 __be64 port_vl_rcv_fecn;
2081                 __be64 port_vl_rcv_becn;
2082                 __be64 port_xmit_time_cong;
2083                 __be64 port_vl_xmit_wasted_bw;
2084                 __be64 port_vl_xmit_wait_data;
2085                 __be64 port_vl_rcv_bubble;
2086                 __be64 port_vl_mark_fecn;
2087                 __be64 port_vl_xmit_discards;
2088         } vls[0]; /* real array size defined by # bits set in vl_select_mask */
2089 };
2090
2091 enum counter_selects {
2092         CS_PORT_XMIT_DATA                       = (1 << 31),
2093         CS_PORT_RCV_DATA                        = (1 << 30),
2094         CS_PORT_XMIT_PKTS                       = (1 << 29),
2095         CS_PORT_RCV_PKTS                        = (1 << 28),
2096         CS_PORT_MCAST_XMIT_PKTS                 = (1 << 27),
2097         CS_PORT_MCAST_RCV_PKTS                  = (1 << 26),
2098         CS_PORT_XMIT_WAIT                       = (1 << 25),
2099         CS_SW_PORT_CONGESTION                   = (1 << 24),
2100         CS_PORT_RCV_FECN                        = (1 << 23),
2101         CS_PORT_RCV_BECN                        = (1 << 22),
2102         CS_PORT_XMIT_TIME_CONG                  = (1 << 21),
2103         CS_PORT_XMIT_WASTED_BW                  = (1 << 20),
2104         CS_PORT_XMIT_WAIT_DATA                  = (1 << 19),
2105         CS_PORT_RCV_BUBBLE                      = (1 << 18),
2106         CS_PORT_MARK_FECN                       = (1 << 17),
2107         CS_PORT_RCV_CONSTRAINT_ERRORS           = (1 << 16),
2108         CS_PORT_RCV_SWITCH_RELAY_ERRORS         = (1 << 15),
2109         CS_PORT_XMIT_DISCARDS                   = (1 << 14),
2110         CS_PORT_XMIT_CONSTRAINT_ERRORS          = (1 << 13),
2111         CS_PORT_RCV_REMOTE_PHYSICAL_ERRORS      = (1 << 12),
2112         CS_LOCAL_LINK_INTEGRITY_ERRORS          = (1 << 11),
2113         CS_PORT_RCV_ERRORS                      = (1 << 10),
2114         CS_EXCESSIVE_BUFFER_OVERRUNS            = (1 << 9),
2115         CS_FM_CONFIG_ERRORS                     = (1 << 8),
2116         CS_LINK_ERROR_RECOVERY                  = (1 << 7),
2117         CS_LINK_DOWNED                          = (1 << 6),
2118         CS_UNCORRECTABLE_ERRORS                 = (1 << 5),
2119 };
2120
2121 struct opa_clear_port_status {
2122         __be64 port_select_mask[4];
2123         __be32 counter_select_mask;
2124 };
2125
2126 struct opa_aggregate {
2127         __be16 attr_id;
2128         __be16 err_reqlength;   /* 1 bit, 8 res, 7 bit */
2129         __be32 attr_mod;
2130         u8 data[0];
2131 };
2132
2133 #define MSK_LLI 0x000000f0
2134 #define MSK_LLI_SFT 4
2135 #define MSK_LER 0x0000000f
2136 #define MSK_LER_SFT 0
2137 #define ADD_LLI 8
2138 #define ADD_LER 2
2139
2140 /* Request contains first three fields, response contains those plus the rest */
2141 struct opa_port_data_counters_msg {
2142         __be64 port_select_mask[4];
2143         __be32 vl_select_mask;
2144         __be32 resolution;
2145
2146         /* Response fields follow */
2147         struct _port_dctrs {
2148                 u8 port_number;
2149                 u8 reserved2[3];
2150                 __be32 link_quality_indicator; /* 29res, 3bit */
2151
2152                 /* Data counters */
2153                 __be64 port_xmit_data;
2154                 __be64 port_rcv_data;
2155                 __be64 port_xmit_pkts;
2156                 __be64 port_rcv_pkts;
2157                 __be64 port_multicast_xmit_pkts;
2158                 __be64 port_multicast_rcv_pkts;
2159                 __be64 port_xmit_wait;
2160                 __be64 sw_port_congestion;
2161                 __be64 port_rcv_fecn;
2162                 __be64 port_rcv_becn;
2163                 __be64 port_xmit_time_cong;
2164                 __be64 port_xmit_wasted_bw;
2165                 __be64 port_xmit_wait_data;
2166                 __be64 port_rcv_bubble;
2167                 __be64 port_mark_fecn;
2168
2169                 __be64 port_error_counter_summary;
2170                 /* Sum of error counts/port */
2171
2172                 struct _vls_dctrs {
2173                         /* per-VL Data counters */
2174                         __be64 port_vl_xmit_data;
2175                         __be64 port_vl_rcv_data;
2176                         __be64 port_vl_xmit_pkts;
2177                         __be64 port_vl_rcv_pkts;
2178                         __be64 port_vl_xmit_wait;
2179                         __be64 sw_port_vl_congestion;
2180                         __be64 port_vl_rcv_fecn;
2181                         __be64 port_vl_rcv_becn;
2182                         __be64 port_xmit_time_cong;
2183                         __be64 port_vl_xmit_wasted_bw;
2184                         __be64 port_vl_xmit_wait_data;
2185                         __be64 port_vl_rcv_bubble;
2186                         __be64 port_vl_mark_fecn;
2187                 } vls[0];
2188                 /* array size defined by #bits set in vl_select_mask*/
2189         } port[1]; /* array size defined by  #ports in attribute modifier */
2190 };
2191
2192 struct opa_port_error_counters64_msg {
2193         /*
2194          * Request contains first two fields, response contains the
2195          * whole magilla
2196          */
2197         __be64 port_select_mask[4];
2198         __be32 vl_select_mask;
2199
2200         /* Response-only fields follow */
2201         __be32 reserved1;
2202         struct _port_ectrs {
2203                 u8 port_number;
2204                 u8 reserved2[7];
2205                 __be64 port_rcv_constraint_errors;
2206                 __be64 port_rcv_switch_relay_errors;
2207                 __be64 port_xmit_discards;
2208                 __be64 port_xmit_constraint_errors;
2209                 __be64 port_rcv_remote_physical_errors;
2210                 __be64 local_link_integrity_errors;
2211                 __be64 port_rcv_errors;
2212                 __be64 excessive_buffer_overruns;
2213                 __be64 fm_config_errors;
2214                 __be32 link_error_recovery;
2215                 __be32 link_downed;
2216                 u8 uncorrectable_errors;
2217                 u8 reserved3[7];
2218                 struct _vls_ectrs {
2219                         __be64 port_vl_xmit_discards;
2220                 } vls[0];
2221                 /* array size defined by #bits set in vl_select_mask */
2222         } port[1]; /* array size defined by #ports in attribute modifier */
2223 };
2224
2225 struct opa_port_error_info_msg {
2226         __be64 port_select_mask[4];
2227         __be32 error_info_select_mask;
2228         __be32 reserved1;
2229         struct _port_ei {
2230                 u8 port_number;
2231                 u8 reserved2[7];
2232
2233                 /* PortRcvErrorInfo */
2234                 struct {
2235                         u8 status_and_code;
2236                         union {
2237                                 u8 raw[17];
2238                                 struct {
2239                                         /* EI1to12 format */
2240                                         u8 packet_flit1[8];
2241                                         u8 packet_flit2[8];
2242                                         u8 remaining_flit_bits12;
2243                                 } ei1to12;
2244                                 struct {
2245                                         u8 packet_bytes[8];
2246                                         u8 remaining_flit_bits;
2247                                 } ei13;
2248                         } ei;
2249                         u8 reserved3[6];
2250                 } __packed port_rcv_ei;
2251
2252                 /* ExcessiveBufferOverrunInfo */
2253                 struct {
2254                         u8 status_and_sc;
2255                         u8 reserved4[7];
2256                 } __packed excessive_buffer_overrun_ei;
2257
2258                 /* PortXmitConstraintErrorInfo */
2259                 struct {
2260                         u8 status;
2261                         u8 reserved5;
2262                         __be16 pkey;
2263                         __be32 slid;
2264                 } __packed port_xmit_constraint_ei;
2265
2266                 /* PortRcvConstraintErrorInfo */
2267                 struct {
2268                         u8 status;
2269                         u8 reserved6;
2270                         __be16 pkey;
2271                         __be32 slid;
2272                 } __packed port_rcv_constraint_ei;
2273
2274                 /* PortRcvSwitchRelayErrorInfo */
2275                 struct {
2276                         u8 status_and_code;
2277                         u8 reserved7[3];
2278                         __u32 error_info;
2279                 } __packed port_rcv_switch_relay_ei;
2280
2281                 /* UncorrectableErrorInfo */
2282                 struct {
2283                         u8 status_and_code;
2284                         u8 reserved8;
2285                 } __packed uncorrectable_ei;
2286
2287                 /* FMConfigErrorInfo */
2288                 struct {
2289                         u8 status_and_code;
2290                         u8 error_info;
2291                 } __packed fm_config_ei;
2292                 __u32 reserved9;
2293         } port[1]; /* actual array size defined by #ports in attr modifier */
2294 };
2295
2296 /* opa_port_error_info_msg error_info_select_mask bit definitions */
2297 enum error_info_selects {
2298         ES_PORT_RCV_ERROR_INFO                  = (1 << 31),
2299         ES_EXCESSIVE_BUFFER_OVERRUN_INFO        = (1 << 30),
2300         ES_PORT_XMIT_CONSTRAINT_ERROR_INFO      = (1 << 29),
2301         ES_PORT_RCV_CONSTRAINT_ERROR_INFO       = (1 << 28),
2302         ES_PORT_RCV_SWITCH_RELAY_ERROR_INFO     = (1 << 27),
2303         ES_UNCORRECTABLE_ERROR_INFO             = (1 << 26),
2304         ES_FM_CONFIG_ERROR_INFO                 = (1 << 25)
2305 };
2306
2307 static int pma_get_opa_classportinfo(struct opa_pma_mad *pmp,
2308                                      struct ib_device *ibdev, u32 *resp_len)
2309 {
2310         struct opa_class_port_info *p =
2311                 (struct opa_class_port_info *)pmp->data;
2312
2313         memset(pmp->data, 0, sizeof(pmp->data));
2314
2315         if (pmp->mad_hdr.attr_mod != 0)
2316                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2317
2318         p->base_version = OPA_MGMT_BASE_VERSION;
2319         p->class_version = OPA_SMI_CLASS_VERSION;
2320         /*
2321          * Expected response time is 4.096 usec. * 2^18 == 1.073741824 sec.
2322          */
2323         p->cap_mask2_resp_time = cpu_to_be32(18);
2324
2325         if (resp_len)
2326                 *resp_len += sizeof(*p);
2327
2328         return reply((struct ib_mad_hdr *)pmp);
2329 }
2330
2331 static void a0_portstatus(struct hfi1_pportdata *ppd,
2332                           struct opa_port_status_rsp *rsp, u32 vl_select_mask)
2333 {
2334         if (!is_bx(ppd->dd)) {
2335                 unsigned long vl;
2336                 u64 sum_vl_xmit_wait = 0;
2337                 u32 vl_all_mask = VL_MASK_ALL;
2338
2339                 for_each_set_bit(vl, (unsigned long *)&(vl_all_mask),
2340                                  8 * sizeof(vl_all_mask)) {
2341                         u64 tmp = sum_vl_xmit_wait +
2342                                   read_port_cntr(ppd, C_TX_WAIT_VL,
2343                                                  idx_from_vl(vl));
2344                         if (tmp < sum_vl_xmit_wait) {
2345                                 /* we wrapped */
2346                                 sum_vl_xmit_wait = (u64)~0;
2347                                 break;
2348                         }
2349                         sum_vl_xmit_wait = tmp;
2350                 }
2351                 if (be64_to_cpu(rsp->port_xmit_wait) > sum_vl_xmit_wait)
2352                         rsp->port_xmit_wait = cpu_to_be64(sum_vl_xmit_wait);
2353         }
2354 }
2355
2356 static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
2357                                   struct ib_device *ibdev,
2358                                   u8 port, u32 *resp_len)
2359 {
2360         struct opa_port_status_req *req =
2361                 (struct opa_port_status_req *)pmp->data;
2362         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2363         struct opa_port_status_rsp *rsp;
2364         u32 vl_select_mask = be32_to_cpu(req->vl_select_mask);
2365         unsigned long vl;
2366         size_t response_data_size;
2367         u32 nports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
2368         u8 port_num = req->port_num;
2369         u8 num_vls = hweight32(vl_select_mask);
2370         struct _vls_pctrs *vlinfo;
2371         struct hfi1_ibport *ibp = to_iport(ibdev, port);
2372         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2373         int vfi;
2374         u64 tmp, tmp2;
2375
2376         response_data_size = sizeof(struct opa_port_status_rsp) +
2377                                 num_vls * sizeof(struct _vls_pctrs);
2378         if (response_data_size > sizeof(pmp->data)) {
2379                 pmp->mad_hdr.status |= OPA_PM_STATUS_REQUEST_TOO_LARGE;
2380                 return reply((struct ib_mad_hdr *)pmp);
2381         }
2382
2383         if (nports != 1 || (port_num && port_num != port) ||
2384             num_vls > OPA_MAX_VLS || (vl_select_mask & ~VL_MASK_ALL)) {
2385                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2386                 return reply((struct ib_mad_hdr *)pmp);
2387         }
2388
2389         memset(pmp->data, 0, sizeof(pmp->data));
2390
2391         rsp = (struct opa_port_status_rsp *)pmp->data;
2392         if (port_num)
2393                 rsp->port_num = port_num;
2394         else
2395                 rsp->port_num = port;
2396
2397         rsp->port_rcv_constraint_errors =
2398                 cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
2399                                            CNTR_INVALID_VL));
2400
2401         hfi1_read_link_quality(dd, &rsp->link_quality_indicator);
2402
2403         rsp->vl_select_mask = cpu_to_be32(vl_select_mask);
2404         rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS,
2405                                           CNTR_INVALID_VL));
2406         rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS,
2407                                          CNTR_INVALID_VL));
2408         rsp->port_xmit_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_PKTS,
2409                                           CNTR_INVALID_VL));
2410         rsp->port_rcv_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_PKTS,
2411                                          CNTR_INVALID_VL));
2412         rsp->port_multicast_xmit_pkts =
2413                 cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS,
2414                                           CNTR_INVALID_VL));
2415         rsp->port_multicast_rcv_pkts =
2416                 cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS,
2417                                           CNTR_INVALID_VL));
2418         rsp->port_xmit_wait =
2419                 cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL));
2420         rsp->port_rcv_fecn =
2421                 cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL));
2422         rsp->port_rcv_becn =
2423                 cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL));
2424         rsp->port_xmit_discards =
2425                 cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD,
2426                                            CNTR_INVALID_VL));
2427         rsp->port_xmit_constraint_errors =
2428                 cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
2429                                            CNTR_INVALID_VL));
2430         rsp->port_rcv_remote_physical_errors =
2431                 cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
2432                                           CNTR_INVALID_VL));
2433         tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL);
2434         tmp2 = tmp + read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL);
2435         if (tmp2 < tmp) {
2436                 /* overflow/wrapped */
2437                 rsp->local_link_integrity_errors = cpu_to_be64(~0);
2438         } else {
2439                 rsp->local_link_integrity_errors = cpu_to_be64(tmp2);
2440         }
2441         tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
2442         tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
2443                                    CNTR_INVALID_VL);
2444         if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) {
2445                 /* overflow/wrapped */
2446                 rsp->link_error_recovery = cpu_to_be32(~0);
2447         } else {
2448                 rsp->link_error_recovery = cpu_to_be32(tmp2);
2449         }
2450         rsp->port_rcv_errors =
2451                 cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
2452         rsp->excessive_buffer_overruns =
2453                 cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL));
2454         rsp->fm_config_errors =
2455                 cpu_to_be64(read_dev_cntr(dd, C_DC_FM_CFG_ERR,
2456                                           CNTR_INVALID_VL));
2457         rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN,
2458                                                       CNTR_INVALID_VL));
2459
2460         /* rsp->uncorrectable_errors is 8 bits wide, and it pegs at 0xff */
2461         tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
2462         rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff;
2463
2464         vlinfo = &rsp->vls[0];
2465         vfi = 0;
2466         /* The vl_select_mask has been checked above, and we know
2467          * that it contains only entries which represent valid VLs.
2468          * So in the for_each_set_bit() loop below, we don't need
2469          * any additional checks for vl.
2470          */
2471         for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
2472                          8 * sizeof(vl_select_mask)) {
2473                 memset(vlinfo, 0, sizeof(*vlinfo));
2474
2475                 tmp = read_dev_cntr(dd, C_DC_RX_FLIT_VL, idx_from_vl(vl));
2476                 rsp->vls[vfi].port_vl_rcv_data = cpu_to_be64(tmp);
2477
2478                 rsp->vls[vfi].port_vl_rcv_pkts =
2479                         cpu_to_be64(read_dev_cntr(dd, C_DC_RX_PKT_VL,
2480                                                   idx_from_vl(vl)));
2481
2482                 rsp->vls[vfi].port_vl_xmit_data =
2483                         cpu_to_be64(read_port_cntr(ppd, C_TX_FLIT_VL,
2484                                                    idx_from_vl(vl)));
2485
2486                 rsp->vls[vfi].port_vl_xmit_pkts =
2487                         cpu_to_be64(read_port_cntr(ppd, C_TX_PKT_VL,
2488                                                    idx_from_vl(vl)));
2489
2490                 rsp->vls[vfi].port_vl_xmit_wait =
2491                         cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT_VL,
2492                                                    idx_from_vl(vl)));
2493
2494                 rsp->vls[vfi].port_vl_rcv_fecn =
2495                         cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN_VL,
2496                                                   idx_from_vl(vl)));
2497
2498                 rsp->vls[vfi].port_vl_rcv_becn =
2499                         cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL,
2500                                                   idx_from_vl(vl)));
2501
2502                 vlinfo++;
2503                 vfi++;
2504         }
2505
2506         a0_portstatus(ppd, rsp, vl_select_mask);
2507
2508         if (resp_len)
2509                 *resp_len += response_data_size;
2510
2511         return reply((struct ib_mad_hdr *)pmp);
2512 }
2513
2514 static u64 get_error_counter_summary(struct ib_device *ibdev, u8 port,
2515                                      u8 res_lli, u8 res_ler)
2516 {
2517         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2518         struct hfi1_ibport *ibp = to_iport(ibdev, port);
2519         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2520         u64 error_counter_summary = 0, tmp;
2521
2522         error_counter_summary += read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
2523                                                 CNTR_INVALID_VL);
2524         /* port_rcv_switch_relay_errors is 0 for HFIs */
2525         error_counter_summary += read_port_cntr(ppd, C_SW_XMIT_DSCD,
2526                                                 CNTR_INVALID_VL);
2527         error_counter_summary += read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
2528                                                 CNTR_INVALID_VL);
2529         error_counter_summary += read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
2530                                                CNTR_INVALID_VL);
2531         /* local link integrity must be right-shifted by the lli resolution */
2532         tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL);
2533         tmp += read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL);
2534         error_counter_summary += (tmp >> res_lli);
2535         /* link error recovery must b right-shifted by the ler resolution */
2536         tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
2537         tmp += read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL);
2538         error_counter_summary += (tmp >> res_ler);
2539         error_counter_summary += read_dev_cntr(dd, C_DC_RCV_ERR,
2540                                                CNTR_INVALID_VL);
2541         error_counter_summary += read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL);
2542         error_counter_summary += read_dev_cntr(dd, C_DC_FM_CFG_ERR,
2543                                                CNTR_INVALID_VL);
2544         /* ppd->link_downed is a 32-bit value */
2545         error_counter_summary += read_port_cntr(ppd, C_SW_LINK_DOWN,
2546                                                 CNTR_INVALID_VL);
2547         tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
2548         /* this is an 8-bit quantity */
2549         error_counter_summary += tmp < 0x100 ? (tmp & 0xff) : 0xff;
2550
2551         return error_counter_summary;
2552 }
2553
2554 static void a0_datacounters(struct hfi1_pportdata *ppd, struct _port_dctrs *rsp,
2555                             u32 vl_select_mask)
2556 {
2557         if (!is_bx(ppd->dd)) {
2558                 unsigned long vl;
2559                 u64 sum_vl_xmit_wait = 0;
2560                 u32 vl_all_mask = VL_MASK_ALL;
2561
2562                 for_each_set_bit(vl, (unsigned long *)&(vl_all_mask),
2563                                  8 * sizeof(vl_all_mask)) {
2564                         u64 tmp = sum_vl_xmit_wait +
2565                                   read_port_cntr(ppd, C_TX_WAIT_VL,
2566                                                  idx_from_vl(vl));
2567                         if (tmp < sum_vl_xmit_wait) {
2568                                 /* we wrapped */
2569                                 sum_vl_xmit_wait = (u64)~0;
2570                                 break;
2571                         }
2572                         sum_vl_xmit_wait = tmp;
2573                 }
2574                 if (be64_to_cpu(rsp->port_xmit_wait) > sum_vl_xmit_wait)
2575                         rsp->port_xmit_wait = cpu_to_be64(sum_vl_xmit_wait);
2576         }
2577 }
2578
2579 static void pma_get_opa_port_dctrs(struct ib_device *ibdev,
2580                                    struct _port_dctrs *rsp)
2581 {
2582         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2583
2584         rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS,
2585                                                 CNTR_INVALID_VL));
2586         rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS,
2587                                                 CNTR_INVALID_VL));
2588         rsp->port_xmit_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_PKTS,
2589                                                 CNTR_INVALID_VL));
2590         rsp->port_rcv_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_PKTS,
2591                                                 CNTR_INVALID_VL));
2592         rsp->port_multicast_xmit_pkts =
2593                 cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS,
2594                                           CNTR_INVALID_VL));
2595         rsp->port_multicast_rcv_pkts =
2596                 cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS,
2597                                           CNTR_INVALID_VL));
2598 }
2599
2600 static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
2601                                     struct ib_device *ibdev,
2602                                     u8 port, u32 *resp_len)
2603 {
2604         struct opa_port_data_counters_msg *req =
2605                 (struct opa_port_data_counters_msg *)pmp->data;
2606         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2607         struct hfi1_ibport *ibp = to_iport(ibdev, port);
2608         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2609         struct _port_dctrs *rsp;
2610         struct _vls_dctrs *vlinfo;
2611         size_t response_data_size;
2612         u32 num_ports;
2613         u8 num_pslm;
2614         u8 lq, num_vls;
2615         u8 res_lli, res_ler;
2616         u64 port_mask;
2617         unsigned long port_num;
2618         unsigned long vl;
2619         u32 vl_select_mask;
2620         int vfi;
2621
2622         num_ports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
2623         num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
2624         num_vls = hweight32(be32_to_cpu(req->vl_select_mask));
2625         vl_select_mask = be32_to_cpu(req->vl_select_mask);
2626         res_lli = (u8)(be32_to_cpu(req->resolution) & MSK_LLI) >> MSK_LLI_SFT;
2627         res_lli = res_lli ? res_lli + ADD_LLI : 0;
2628         res_ler = (u8)(be32_to_cpu(req->resolution) & MSK_LER) >> MSK_LER_SFT;
2629         res_ler = res_ler ? res_ler + ADD_LER : 0;
2630
2631         if (num_ports != 1 || (vl_select_mask & ~VL_MASK_ALL)) {
2632                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2633                 return reply((struct ib_mad_hdr *)pmp);
2634         }
2635
2636         /* Sanity check */
2637         response_data_size = sizeof(struct opa_port_data_counters_msg) +
2638                                 num_vls * sizeof(struct _vls_dctrs);
2639
2640         if (response_data_size > sizeof(pmp->data)) {
2641                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2642                 return reply((struct ib_mad_hdr *)pmp);
2643         }
2644
2645         /*
2646          * The bit set in the mask needs to be consistent with the
2647          * port the request came in on.
2648          */
2649         port_mask = be64_to_cpu(req->port_select_mask[3]);
2650         port_num = find_first_bit((unsigned long *)&port_mask,
2651                                   sizeof(port_mask));
2652
2653         if ((u8)port_num != port) {
2654                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2655                 return reply((struct ib_mad_hdr *)pmp);
2656         }
2657
2658         rsp = &req->port[0];
2659         memset(rsp, 0, sizeof(*rsp));
2660
2661         rsp->port_number = port;
2662         /*
2663          * Note that link_quality_indicator is a 32 bit quantity in
2664          * 'datacounters' queries (as opposed to 'portinfo' queries,
2665          * where it's a byte).
2666          */
2667         hfi1_read_link_quality(dd, &lq);
2668         rsp->link_quality_indicator = cpu_to_be32((u32)lq);
2669         pma_get_opa_port_dctrs(ibdev, rsp);
2670
2671         rsp->port_xmit_wait =
2672                 cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL));
2673         rsp->port_rcv_fecn =
2674                 cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL));
2675         rsp->port_rcv_becn =
2676                 cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL));
2677         rsp->port_error_counter_summary =
2678                 cpu_to_be64(get_error_counter_summary(ibdev, port,
2679                                                       res_lli, res_ler));
2680
2681         vlinfo = &rsp->vls[0];
2682         vfi = 0;
2683         /* The vl_select_mask has been checked above, and we know
2684          * that it contains only entries which represent valid VLs.
2685          * So in the for_each_set_bit() loop below, we don't need
2686          * any additional checks for vl.
2687          */
2688         for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
2689                          8 * sizeof(req->vl_select_mask)) {
2690                 memset(vlinfo, 0, sizeof(*vlinfo));
2691
2692                 rsp->vls[vfi].port_vl_xmit_data =
2693                         cpu_to_be64(read_port_cntr(ppd, C_TX_FLIT_VL,
2694                                                    idx_from_vl(vl)));
2695
2696                 rsp->vls[vfi].port_vl_rcv_data =
2697                         cpu_to_be64(read_dev_cntr(dd, C_DC_RX_FLIT_VL,
2698                                                   idx_from_vl(vl)));
2699
2700                 rsp->vls[vfi].port_vl_xmit_pkts =
2701                         cpu_to_be64(read_port_cntr(ppd, C_TX_PKT_VL,
2702                                                    idx_from_vl(vl)));
2703
2704                 rsp->vls[vfi].port_vl_rcv_pkts =
2705                         cpu_to_be64(read_dev_cntr(dd, C_DC_RX_PKT_VL,
2706                                                   idx_from_vl(vl)));
2707
2708                 rsp->vls[vfi].port_vl_xmit_wait =
2709                         cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT_VL,
2710                                                    idx_from_vl(vl)));
2711
2712                 rsp->vls[vfi].port_vl_rcv_fecn =
2713                         cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN_VL,
2714                                                   idx_from_vl(vl)));
2715                 rsp->vls[vfi].port_vl_rcv_becn =
2716                         cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL,
2717                                                   idx_from_vl(vl)));
2718
2719                 /* rsp->port_vl_xmit_time_cong is 0 for HFIs */
2720                 /* rsp->port_vl_xmit_wasted_bw ??? */
2721                 /* port_vl_xmit_wait_data - TXE (table 13-9 HFI spec) ???
2722                  * does this differ from rsp->vls[vfi].port_vl_xmit_wait
2723                  */
2724                 /*rsp->vls[vfi].port_vl_mark_fecn =
2725                  *      cpu_to_be64(read_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT
2726                  *              + offset));
2727                  */
2728                 vlinfo++;
2729                 vfi++;
2730         }
2731
2732         a0_datacounters(ppd, rsp, vl_select_mask);
2733
2734         if (resp_len)
2735                 *resp_len += response_data_size;
2736
2737         return reply((struct ib_mad_hdr *)pmp);
2738 }
2739
2740 static int pma_get_ib_portcounters_ext(struct ib_pma_mad *pmp,
2741                                        struct ib_device *ibdev, u8 port)
2742 {
2743         struct ib_pma_portcounters_ext *p = (struct ib_pma_portcounters_ext *)
2744                                                 pmp->data;
2745         struct _port_dctrs rsp;
2746
2747         if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) {
2748                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2749                 goto bail;
2750         }
2751
2752         memset(&rsp, 0, sizeof(rsp));
2753         pma_get_opa_port_dctrs(ibdev, &rsp);
2754
2755         p->port_xmit_data = rsp.port_xmit_data;
2756         p->port_rcv_data = rsp.port_rcv_data;
2757         p->port_xmit_packets = rsp.port_xmit_pkts;
2758         p->port_rcv_packets = rsp.port_rcv_pkts;
2759         p->port_unicast_xmit_packets = 0;
2760         p->port_unicast_rcv_packets =  0;
2761         p->port_multicast_xmit_packets = rsp.port_multicast_xmit_pkts;
2762         p->port_multicast_rcv_packets = rsp.port_multicast_rcv_pkts;
2763
2764 bail:
2765         return reply((struct ib_mad_hdr *)pmp);
2766 }
2767
2768 static void pma_get_opa_port_ectrs(struct ib_device *ibdev,
2769                                    struct _port_ectrs *rsp, u8 port)
2770 {
2771         u64 tmp, tmp2;
2772         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2773         struct hfi1_ibport *ibp = to_iport(ibdev, port);
2774         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2775
2776         tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
2777         tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
2778                                         CNTR_INVALID_VL);
2779         if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) {
2780                 /* overflow/wrapped */
2781                 rsp->link_error_recovery = cpu_to_be32(~0);
2782         } else {
2783                 rsp->link_error_recovery = cpu_to_be32(tmp2);
2784         }
2785
2786         rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN,
2787                                                 CNTR_INVALID_VL));
2788         rsp->port_rcv_errors =
2789                 cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
2790         rsp->port_rcv_remote_physical_errors =
2791                 cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
2792                                           CNTR_INVALID_VL));
2793         rsp->port_rcv_switch_relay_errors = 0;
2794         rsp->port_xmit_discards =
2795                 cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD,
2796                                            CNTR_INVALID_VL));
2797         rsp->port_xmit_constraint_errors =
2798                 cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
2799                                            CNTR_INVALID_VL));
2800         rsp->port_rcv_constraint_errors =
2801                 cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
2802                                            CNTR_INVALID_VL));
2803         tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL);
2804         tmp2 = tmp + read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL);
2805         if (tmp2 < tmp) {
2806                 /* overflow/wrapped */
2807                 rsp->local_link_integrity_errors = cpu_to_be64(~0);
2808         } else {
2809                 rsp->local_link_integrity_errors = cpu_to_be64(tmp2);
2810         }
2811         rsp->excessive_buffer_overruns =
2812                 cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL));
2813 }
2814
2815 static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
2816                                   struct ib_device *ibdev,
2817                                   u8 port, u32 *resp_len)
2818 {
2819         size_t response_data_size;
2820         struct _port_ectrs *rsp;
2821         u8 port_num;
2822         struct opa_port_error_counters64_msg *req;
2823         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2824         u32 num_ports;
2825         u8 num_pslm;
2826         u8 num_vls;
2827         struct hfi1_ibport *ibp;
2828         struct hfi1_pportdata *ppd;
2829         struct _vls_ectrs *vlinfo;
2830         unsigned long vl;
2831         u64 port_mask, tmp;
2832         u32 vl_select_mask;
2833         int vfi;
2834
2835         req = (struct opa_port_error_counters64_msg *)pmp->data;
2836
2837         num_ports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
2838
2839         num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
2840         num_vls = hweight32(be32_to_cpu(req->vl_select_mask));
2841
2842         if (num_ports != 1 || num_ports != num_pslm) {
2843                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2844                 return reply((struct ib_mad_hdr *)pmp);
2845         }
2846
2847         response_data_size = sizeof(struct opa_port_error_counters64_msg) +
2848                                 num_vls * sizeof(struct _vls_ectrs);
2849
2850         if (response_data_size > sizeof(pmp->data)) {
2851                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2852                 return reply((struct ib_mad_hdr *)pmp);
2853         }
2854         /*
2855          * The bit set in the mask needs to be consistent with the
2856          * port the request came in on.
2857          */
2858         port_mask = be64_to_cpu(req->port_select_mask[3]);
2859         port_num = find_first_bit((unsigned long *)&port_mask,
2860                                   sizeof(port_mask));
2861
2862         if (port_num != port) {
2863                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2864                 return reply((struct ib_mad_hdr *)pmp);
2865         }
2866
2867         rsp = &req->port[0];
2868
2869         ibp = to_iport(ibdev, port_num);
2870         ppd = ppd_from_ibp(ibp);
2871
2872         memset(rsp, 0, sizeof(*rsp));
2873         rsp->port_number = port_num;
2874
2875         pma_get_opa_port_ectrs(ibdev, rsp, port_num);
2876
2877         rsp->port_rcv_remote_physical_errors =
2878                 cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
2879                                           CNTR_INVALID_VL));
2880         rsp->fm_config_errors =
2881                 cpu_to_be64(read_dev_cntr(dd, C_DC_FM_CFG_ERR,
2882                                           CNTR_INVALID_VL));
2883         tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
2884
2885         rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff;
2886
2887         vlinfo = &rsp->vls[0];
2888         vfi = 0;
2889         vl_select_mask = be32_to_cpu(req->vl_select_mask);
2890         for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
2891                          8 * sizeof(req->vl_select_mask)) {
2892                 memset(vlinfo, 0, sizeof(*vlinfo));
2893                 /* vlinfo->vls[vfi].port_vl_xmit_discards ??? */
2894                 vlinfo += 1;
2895                 vfi++;
2896         }
2897
2898         if (resp_len)
2899                 *resp_len += response_data_size;
2900
2901         return reply((struct ib_mad_hdr *)pmp);
2902 }
2903
2904 static int pma_get_ib_portcounters(struct ib_pma_mad *pmp,
2905                                    struct ib_device *ibdev, u8 port)
2906 {
2907         struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
2908                 pmp->data;
2909         struct _port_ectrs rsp;
2910         u64 temp_link_overrun_errors;
2911         u64 temp_64;
2912         u32 temp_32;
2913
2914         memset(&rsp, 0, sizeof(rsp));
2915         pma_get_opa_port_ectrs(ibdev, &rsp, port);
2916
2917         if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) {
2918                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2919                 goto bail;
2920         }
2921
2922         p->symbol_error_counter = 0; /* N/A for OPA */
2923
2924         temp_32 = be32_to_cpu(rsp.link_error_recovery);
2925         if (temp_32 > 0xFFUL)
2926                 p->link_error_recovery_counter = 0xFF;
2927         else
2928                 p->link_error_recovery_counter = (u8)temp_32;
2929
2930         temp_32 = be32_to_cpu(rsp.link_downed);
2931         if (temp_32 > 0xFFUL)
2932                 p->link_downed_counter = 0xFF;
2933         else
2934                 p->link_downed_counter = (u8)temp_32;
2935
2936         temp_64 = be64_to_cpu(rsp.port_rcv_errors);
2937         if (temp_64 > 0xFFFFUL)
2938                 p->port_rcv_errors = cpu_to_be16(0xFFFF);
2939         else
2940                 p->port_rcv_errors = cpu_to_be16((u16)temp_64);
2941
2942         temp_64 = be64_to_cpu(rsp.port_rcv_remote_physical_errors);
2943         if (temp_64 > 0xFFFFUL)
2944                 p->port_rcv_remphys_errors = cpu_to_be16(0xFFFF);
2945         else
2946                 p->port_rcv_remphys_errors = cpu_to_be16((u16)temp_64);
2947
2948         temp_64 = be64_to_cpu(rsp.port_rcv_switch_relay_errors);
2949         p->port_rcv_switch_relay_errors = cpu_to_be16((u16)temp_64);
2950
2951         temp_64 = be64_to_cpu(rsp.port_xmit_discards);
2952         if (temp_64 > 0xFFFFUL)
2953                 p->port_xmit_discards = cpu_to_be16(0xFFFF);
2954         else
2955                 p->port_xmit_discards = cpu_to_be16((u16)temp_64);
2956
2957         temp_64 = be64_to_cpu(rsp.port_xmit_constraint_errors);
2958         if (temp_64 > 0xFFUL)
2959                 p->port_xmit_constraint_errors = 0xFF;
2960         else
2961                 p->port_xmit_constraint_errors = (u8)temp_64;
2962
2963         temp_64 = be64_to_cpu(rsp.port_rcv_constraint_errors);
2964         if (temp_64 > 0xFFUL)
2965                 p->port_rcv_constraint_errors = 0xFFUL;
2966         else
2967                 p->port_rcv_constraint_errors = (u8)temp_64;
2968
2969         /* LocalLink: 7:4, BufferOverrun: 3:0 */
2970         temp_64 = be64_to_cpu(rsp.local_link_integrity_errors);
2971         if (temp_64 > 0xFUL)
2972                 temp_64 = 0xFUL;
2973
2974         temp_link_overrun_errors = temp_64 << 4;
2975
2976         temp_64 = be64_to_cpu(rsp.excessive_buffer_overruns);
2977         if (temp_64 > 0xFUL)
2978                 temp_64 = 0xFUL;
2979         temp_link_overrun_errors |= temp_64;
2980
2981         p->link_overrun_errors = (u8)temp_link_overrun_errors;
2982
2983         p->vl15_dropped = 0; /* N/A for OPA */
2984
2985 bail:
2986         return reply((struct ib_mad_hdr *)pmp);
2987 }
2988
2989 static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
2990                                  struct ib_device *ibdev,
2991                                  u8 port, u32 *resp_len)
2992 {
2993         size_t response_data_size;
2994         struct _port_ei *rsp;
2995         struct opa_port_error_info_msg *req;
2996         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2997         u64 port_mask;
2998         u32 num_ports;
2999         u8 port_num;
3000         u8 num_pslm;
3001         u64 reg;
3002
3003         req = (struct opa_port_error_info_msg *)pmp->data;
3004         rsp = &req->port[0];
3005
3006         num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod));
3007         num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
3008
3009         memset(rsp, 0, sizeof(*rsp));
3010
3011         if (num_ports != 1 || num_ports != num_pslm) {
3012                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3013                 return reply((struct ib_mad_hdr *)pmp);
3014         }
3015
3016         /* Sanity check */
3017         response_data_size = sizeof(struct opa_port_error_info_msg);
3018
3019         if (response_data_size > sizeof(pmp->data)) {
3020                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3021                 return reply((struct ib_mad_hdr *)pmp);
3022         }
3023
3024         /*
3025          * The bit set in the mask needs to be consistent with the port
3026          * the request came in on.
3027          */
3028         port_mask = be64_to_cpu(req->port_select_mask[3]);
3029         port_num = find_first_bit((unsigned long *)&port_mask,
3030                                   sizeof(port_mask));
3031
3032         if (port_num != port) {
3033                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3034                 return reply((struct ib_mad_hdr *)pmp);
3035         }
3036
3037         /* PortRcvErrorInfo */
3038         rsp->port_rcv_ei.status_and_code =
3039                 dd->err_info_rcvport.status_and_code;
3040         memcpy(&rsp->port_rcv_ei.ei.ei1to12.packet_flit1,
3041                &dd->err_info_rcvport.packet_flit1, sizeof(u64));
3042         memcpy(&rsp->port_rcv_ei.ei.ei1to12.packet_flit2,
3043                &dd->err_info_rcvport.packet_flit2, sizeof(u64));
3044
3045         /* ExcessiverBufferOverrunInfo */
3046         reg = read_csr(dd, RCV_ERR_INFO);
3047         if (reg & RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK) {
3048                 /*
3049                  * if the RcvExcessBufferOverrun bit is set, save SC of
3050                  * first pkt that encountered an excess buffer overrun
3051                  */
3052                 u8 tmp = (u8)reg;
3053
3054                 tmp &=  RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SC_SMASK;
3055                 tmp <<= 2;
3056                 rsp->excessive_buffer_overrun_ei.status_and_sc = tmp;
3057                 /* set the status bit */
3058                 rsp->excessive_buffer_overrun_ei.status_and_sc |= 0x80;
3059         }
3060
3061         rsp->port_xmit_constraint_ei.status =
3062                 dd->err_info_xmit_constraint.status;
3063         rsp->port_xmit_constraint_ei.pkey =
3064                 cpu_to_be16(dd->err_info_xmit_constraint.pkey);
3065         rsp->port_xmit_constraint_ei.slid =
3066                 cpu_to_be32(dd->err_info_xmit_constraint.slid);
3067
3068         rsp->port_rcv_constraint_ei.status =
3069                 dd->err_info_rcv_constraint.status;
3070         rsp->port_rcv_constraint_ei.pkey =
3071                 cpu_to_be16(dd->err_info_rcv_constraint.pkey);
3072         rsp->port_rcv_constraint_ei.slid =
3073                 cpu_to_be32(dd->err_info_rcv_constraint.slid);
3074
3075         /* UncorrectableErrorInfo */
3076         rsp->uncorrectable_ei.status_and_code = dd->err_info_uncorrectable;
3077
3078         /* FMConfigErrorInfo */
3079         rsp->fm_config_ei.status_and_code = dd->err_info_fmconfig;
3080
3081         if (resp_len)
3082                 *resp_len += response_data_size;
3083
3084         return reply((struct ib_mad_hdr *)pmp);
3085 }
3086
3087 static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
3088                                   struct ib_device *ibdev,
3089                                   u8 port, u32 *resp_len)
3090 {
3091         struct opa_clear_port_status *req =
3092                 (struct opa_clear_port_status *)pmp->data;
3093         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3094         struct hfi1_ibport *ibp = to_iport(ibdev, port);
3095         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3096         u32 nports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
3097         u64 portn = be64_to_cpu(req->port_select_mask[3]);
3098         u32 counter_select = be32_to_cpu(req->counter_select_mask);
3099         u32 vl_select_mask = VL_MASK_ALL; /* clear all per-vl cnts */
3100         unsigned long vl;
3101
3102         if ((nports != 1) || (portn != 1 << port)) {
3103                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3104                 return reply((struct ib_mad_hdr *)pmp);
3105         }
3106         /*
3107          * only counters returned by pma_get_opa_portstatus() are
3108          * handled, so when pma_get_opa_portstatus() gets a fix,
3109          * the corresponding change should be made here as well.
3110          */
3111
3112         if (counter_select & CS_PORT_XMIT_DATA)
3113                 write_dev_cntr(dd, C_DC_XMIT_FLITS, CNTR_INVALID_VL, 0);
3114
3115         if (counter_select & CS_PORT_RCV_DATA)
3116                 write_dev_cntr(dd, C_DC_RCV_FLITS, CNTR_INVALID_VL, 0);
3117
3118         if (counter_select & CS_PORT_XMIT_PKTS)
3119                 write_dev_cntr(dd, C_DC_XMIT_PKTS, CNTR_INVALID_VL, 0);
3120
3121         if (counter_select & CS_PORT_RCV_PKTS)
3122                 write_dev_cntr(dd, C_DC_RCV_PKTS, CNTR_INVALID_VL, 0);
3123
3124         if (counter_select & CS_PORT_MCAST_XMIT_PKTS)
3125                 write_dev_cntr(dd, C_DC_MC_XMIT_PKTS, CNTR_INVALID_VL, 0);
3126
3127         if (counter_select & CS_PORT_MCAST_RCV_PKTS)
3128                 write_dev_cntr(dd, C_DC_MC_RCV_PKTS, CNTR_INVALID_VL, 0);
3129
3130         if (counter_select & CS_PORT_XMIT_WAIT)
3131                 write_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL, 0);
3132
3133         /* ignore cs_sw_portCongestion for HFIs */
3134
3135         if (counter_select & CS_PORT_RCV_FECN)
3136                 write_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL, 0);
3137
3138         if (counter_select & CS_PORT_RCV_BECN)
3139                 write_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL, 0);
3140
3141         /* ignore cs_port_xmit_time_cong for HFIs */
3142         /* ignore cs_port_xmit_wasted_bw for now */
3143         /* ignore cs_port_xmit_wait_data for now */
3144         if (counter_select & CS_PORT_RCV_BUBBLE)
3145                 write_dev_cntr(dd, C_DC_RCV_BBL, CNTR_INVALID_VL, 0);
3146
3147         /* Only applicable for switch */
3148         /* if (counter_select & CS_PORT_MARK_FECN)
3149          *      write_csr(dd, DCC_PRF_PORT_MARK_FECN_CNT, 0);
3150          */
3151
3152         if (counter_select & CS_PORT_RCV_CONSTRAINT_ERRORS)
3153                 write_port_cntr(ppd, C_SW_RCV_CSTR_ERR, CNTR_INVALID_VL, 0);
3154
3155         /* ignore cs_port_rcv_switch_relay_errors for HFIs */
3156         if (counter_select & CS_PORT_XMIT_DISCARDS)
3157                 write_port_cntr(ppd, C_SW_XMIT_DSCD, CNTR_INVALID_VL, 0);
3158
3159         if (counter_select & CS_PORT_XMIT_CONSTRAINT_ERRORS)
3160                 write_port_cntr(ppd, C_SW_XMIT_CSTR_ERR, CNTR_INVALID_VL, 0);
3161
3162         if (counter_select & CS_PORT_RCV_REMOTE_PHYSICAL_ERRORS)
3163                 write_dev_cntr(dd, C_DC_RMT_PHY_ERR, CNTR_INVALID_VL, 0);
3164
3165         if (counter_select & CS_LOCAL_LINK_INTEGRITY_ERRORS) {
3166                 write_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL, 0);
3167                 write_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL, 0);
3168         }
3169
3170         if (counter_select & CS_LINK_ERROR_RECOVERY) {
3171                 write_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL, 0);
3172                 write_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
3173                                CNTR_INVALID_VL, 0);
3174         }
3175
3176         if (counter_select & CS_PORT_RCV_ERRORS)
3177                 write_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL, 0);
3178
3179         if (counter_select & CS_EXCESSIVE_BUFFER_OVERRUNS) {
3180                 write_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL, 0);
3181                 dd->rcv_ovfl_cnt = 0;
3182         }
3183
3184         if (counter_select & CS_FM_CONFIG_ERRORS)
3185                 write_dev_cntr(dd, C_DC_FM_CFG_ERR, CNTR_INVALID_VL, 0);
3186
3187         if (counter_select & CS_LINK_DOWNED)
3188                 write_port_cntr(ppd, C_SW_LINK_DOWN, CNTR_INVALID_VL, 0);
3189
3190         if (counter_select & CS_UNCORRECTABLE_ERRORS)
3191                 write_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL, 0);
3192
3193         for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
3194                          8 * sizeof(vl_select_mask)) {
3195                 if (counter_select & CS_PORT_XMIT_DATA)
3196                         write_port_cntr(ppd, C_TX_FLIT_VL, idx_from_vl(vl), 0);
3197
3198                 if (counter_select & CS_PORT_RCV_DATA)
3199                         write_dev_cntr(dd, C_DC_RX_FLIT_VL, idx_from_vl(vl), 0);
3200
3201                 if (counter_select & CS_PORT_XMIT_PKTS)
3202                         write_port_cntr(ppd, C_TX_PKT_VL, idx_from_vl(vl), 0);
3203
3204                 if (counter_select & CS_PORT_RCV_PKTS)
3205                         write_dev_cntr(dd, C_DC_RX_PKT_VL, idx_from_vl(vl), 0);
3206
3207                 if (counter_select & CS_PORT_XMIT_WAIT)
3208                         write_port_cntr(ppd, C_TX_WAIT_VL, idx_from_vl(vl), 0);
3209
3210                 /* sw_port_vl_congestion is 0 for HFIs */
3211                 if (counter_select & CS_PORT_RCV_FECN)
3212                         write_dev_cntr(dd, C_DC_RCV_FCN_VL, idx_from_vl(vl), 0);
3213
3214                 if (counter_select & CS_PORT_RCV_BECN)
3215                         write_dev_cntr(dd, C_DC_RCV_BCN_VL, idx_from_vl(vl), 0);
3216
3217                 /* port_vl_xmit_time_cong is 0 for HFIs */
3218                 /* port_vl_xmit_wasted_bw ??? */
3219                 /* port_vl_xmit_wait_data - TXE (table 13-9 HFI spec) ??? */
3220                 if (counter_select & CS_PORT_RCV_BUBBLE)
3221                         write_dev_cntr(dd, C_DC_RCV_BBL_VL, idx_from_vl(vl), 0);
3222
3223                 /* if (counter_select & CS_PORT_MARK_FECN)
3224                  *     write_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT + offset, 0);
3225                  */
3226                 /* port_vl_xmit_discards ??? */
3227         }
3228
3229         if (resp_len)
3230                 *resp_len += sizeof(*req);
3231
3232         return reply((struct ib_mad_hdr *)pmp);
3233 }
3234
3235 static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp,
3236                                  struct ib_device *ibdev,
3237                                  u8 port, u32 *resp_len)
3238 {
3239         struct _port_ei *rsp;
3240         struct opa_port_error_info_msg *req;
3241         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3242         u64 port_mask;
3243         u32 num_ports;
3244         u8 port_num;
3245         u8 num_pslm;
3246         u32 error_info_select;
3247
3248         req = (struct opa_port_error_info_msg *)pmp->data;
3249         rsp = &req->port[0];
3250
3251         num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod));
3252         num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
3253
3254         memset(rsp, 0, sizeof(*rsp));
3255
3256         if (num_ports != 1 || num_ports != num_pslm) {
3257                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3258                 return reply((struct ib_mad_hdr *)pmp);
3259         }
3260
3261         /*
3262          * The bit set in the mask needs to be consistent with the port
3263          * the request came in on.
3264          */
3265         port_mask = be64_to_cpu(req->port_select_mask[3]);
3266         port_num = find_first_bit((unsigned long *)&port_mask,
3267                                   sizeof(port_mask));
3268
3269         if (port_num != port) {
3270                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3271                 return reply((struct ib_mad_hdr *)pmp);
3272         }
3273
3274         error_info_select = be32_to_cpu(req->error_info_select_mask);
3275
3276         /* PortRcvErrorInfo */
3277         if (error_info_select & ES_PORT_RCV_ERROR_INFO)
3278                 /* turn off status bit */
3279                 dd->err_info_rcvport.status_and_code &= ~OPA_EI_STATUS_SMASK;
3280
3281         /* ExcessiverBufferOverrunInfo */
3282         if (error_info_select & ES_EXCESSIVE_BUFFER_OVERRUN_INFO)
3283                 /*
3284                  * status bit is essentially kept in the h/w - bit 5 of
3285                  * RCV_ERR_INFO
3286                  */
3287                 write_csr(dd, RCV_ERR_INFO,
3288                           RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK);
3289
3290         if (error_info_select & ES_PORT_XMIT_CONSTRAINT_ERROR_INFO)
3291                 dd->err_info_xmit_constraint.status &= ~OPA_EI_STATUS_SMASK;
3292
3293         if (error_info_select & ES_PORT_RCV_CONSTRAINT_ERROR_INFO)
3294                 dd->err_info_rcv_constraint.status &= ~OPA_EI_STATUS_SMASK;
3295
3296         /* UncorrectableErrorInfo */
3297         if (error_info_select & ES_UNCORRECTABLE_ERROR_INFO)
3298                 /* turn off status bit */
3299                 dd->err_info_uncorrectable &= ~OPA_EI_STATUS_SMASK;
3300
3301         /* FMConfigErrorInfo */
3302         if (error_info_select & ES_FM_CONFIG_ERROR_INFO)
3303                 /* turn off status bit */
3304                 dd->err_info_fmconfig &= ~OPA_EI_STATUS_SMASK;
3305
3306         if (resp_len)
3307                 *resp_len += sizeof(*req);
3308
3309         return reply((struct ib_mad_hdr *)pmp);
3310 }
3311
3312 struct opa_congestion_info_attr {
3313         __be16 congestion_info;
3314         u8 control_table_cap;   /* Multiple of 64 entry unit CCTs */
3315         u8 congestion_log_length;
3316 } __packed;
3317
3318 static int __subn_get_opa_cong_info(struct opa_smp *smp, u32 am, u8 *data,
3319                                     struct ib_device *ibdev, u8 port,
3320                                     u32 *resp_len)
3321 {
3322         struct opa_congestion_info_attr *p =
3323                 (struct opa_congestion_info_attr *)data;
3324         struct hfi1_ibport *ibp = to_iport(ibdev, port);
3325         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3326
3327         p->congestion_info = 0;
3328         p->control_table_cap = ppd->cc_max_table_entries;
3329         p->congestion_log_length = OPA_CONG_LOG_ELEMS;
3330
3331         if (resp_len)
3332                 *resp_len += sizeof(*p);
3333
3334         return reply((struct ib_mad_hdr *)smp);
3335 }
3336
3337 static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am,
3338                                        u8 *data, struct ib_device *ibdev,
3339                                        u8 port, u32 *resp_len)
3340 {
3341         int i;
3342         struct opa_congestion_setting_attr *p =
3343                 (struct opa_congestion_setting_attr *)data;
3344         struct hfi1_ibport *ibp = to_iport(ibdev, port);
3345         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3346         struct opa_congestion_setting_entry_shadow *entries;
3347         struct cc_state *cc_state;
3348
3349         rcu_read_lock();
3350
3351         cc_state = get_cc_state(ppd);
3352
3353         if (!cc_state) {
3354                 rcu_read_unlock();
3355                 return reply((struct ib_mad_hdr *)smp);
3356         }
3357
3358         entries = cc_state->cong_setting.entries;
3359         p->port_control = cpu_to_be16(cc_state->cong_setting.port_control);
3360         p->control_map = cpu_to_be32(cc_state->cong_setting.control_map);
3361         for (i = 0; i < OPA_MAX_SLS; i++) {
3362                 p->entries[i].ccti_increase = entries[i].ccti_increase;
3363                 p->entries[i].ccti_timer = cpu_to_be16(entries[i].ccti_timer);
3364                 p->entries[i].trigger_threshold =
3365                         entries[i].trigger_threshold;
3366                 p->entries[i].ccti_min = entries[i].ccti_min;
3367         }
3368
3369         rcu_read_unlock();
3370
3371         if (resp_len)
3372                 *resp_len += sizeof(*p);
3373
3374         return reply((struct ib_mad_hdr *)smp);
3375 }
3376
3377 /*
3378  * Apply congestion control information stored in the ppd to the
3379  * active structure.
3380  */
3381 static void apply_cc_state(struct hfi1_pportdata *ppd)
3382 {
3383         struct cc_state *old_cc_state, *new_cc_state;
3384
3385         new_cc_state = kzalloc(sizeof(*new_cc_state), GFP_KERNEL);
3386         if (!new_cc_state)
3387                 return;
3388
3389         /*
3390          * Hold the lock for updating *and* to prevent ppd information
3391          * from changing during the update.
3392          */
3393         spin_lock(&ppd->cc_state_lock);
3394
3395         old_cc_state = get_cc_state(ppd);
3396         if (!old_cc_state) {
3397                 /* never active, or shutting down */
3398                 spin_unlock(&ppd->cc_state_lock);
3399                 kfree(new_cc_state);
3400                 return;
3401         }
3402
3403         *new_cc_state = *old_cc_state;
3404
3405         new_cc_state->cct.ccti_limit = ppd->total_cct_entry - 1;
3406         memcpy(new_cc_state->cct.entries, ppd->ccti_entries,
3407                ppd->total_cct_entry * sizeof(struct ib_cc_table_entry));
3408
3409         new_cc_state->cong_setting.port_control = IB_CC_CCS_PC_SL_BASED;
3410         new_cc_state->cong_setting.control_map = ppd->cc_sl_control_map;
3411         memcpy(new_cc_state->cong_setting.entries, ppd->congestion_entries,
3412                OPA_MAX_SLS * sizeof(struct opa_congestion_setting_entry));
3413
3414         rcu_assign_pointer(ppd->cc_state, new_cc_state);
3415
3416         spin_unlock(&ppd->cc_state_lock);
3417
3418         call_rcu(&old_cc_state->rcu, cc_state_reclaim);
3419 }
3420
3421 static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
3422                                        struct ib_device *ibdev, u8 port,
3423                                        u32 *resp_len)
3424 {
3425         struct opa_congestion_setting_attr *p =
3426                 (struct opa_congestion_setting_attr *)data;
3427         struct hfi1_ibport *ibp = to_iport(ibdev, port);
3428         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3429         struct opa_congestion_setting_entry_shadow *entries;
3430         int i;
3431
3432         /*
3433          * Save details from packet into the ppd.  Hold the cc_state_lock so
3434          * our information is consistent with anyone trying to apply the state.
3435          */
3436         spin_lock(&ppd->cc_state_lock);
3437         ppd->cc_sl_control_map = be32_to_cpu(p->control_map);
3438
3439         entries = ppd->congestion_entries;
3440         for (i = 0; i < OPA_MAX_SLS; i++) {
3441                 entries[i].ccti_increase = p->entries[i].ccti_increase;
3442                 entries[i].ccti_timer = be16_to_cpu(p->entries[i].ccti_timer);
3443                 entries[i].trigger_threshold =
3444                         p->entries[i].trigger_threshold;
3445                 entries[i].ccti_min = p->entries[i].ccti_min;
3446         }
3447         spin_unlock(&ppd->cc_state_lock);
3448
3449         /* now apply the information */
3450         apply_cc_state(ppd);
3451
3452         return __subn_get_opa_cong_setting(smp, am, data, ibdev, port,
3453                                            resp_len);
3454 }
3455
3456 static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am,
3457                                         u8 *data, struct ib_device *ibdev,
3458                                         u8 port, u32 *resp_len)
3459 {
3460         struct hfi1_ibport *ibp = to_iport(ibdev, port);
3461         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3462         struct opa_hfi1_cong_log *cong_log = (struct opa_hfi1_cong_log *)data;
3463         s64 ts;
3464         int i;
3465
3466         if (am != 0) {
3467                 smp->status |= IB_SMP_INVALID_FIELD;
3468                 return reply((struct ib_mad_hdr *)smp);
3469         }
3470
3471         spin_lock_irq(&ppd->cc_log_lock);
3472
3473         cong_log->log_type = OPA_CC_LOG_TYPE_HFI;
3474         cong_log->congestion_flags = 0;
3475         cong_log->threshold_event_counter =
3476                 cpu_to_be16(ppd->threshold_event_counter);
3477         memcpy(cong_log->threshold_cong_event_map,
3478                ppd->threshold_cong_event_map,
3479                sizeof(cong_log->threshold_cong_event_map));
3480         /* keep timestamp in units of 1.024 usec */
3481         ts = ktime_to_ns(ktime_get()) / 1024;
3482         cong_log->current_time_stamp = cpu_to_be32(ts);
3483         for (i = 0; i < OPA_CONG_LOG_ELEMS; i++) {
3484                 struct opa_hfi1_cong_log_event_internal *cce =
3485                         &ppd->cc_events[ppd->cc_mad_idx++];
3486                 if (ppd->cc_mad_idx == OPA_CONG_LOG_ELEMS)
3487                         ppd->cc_mad_idx = 0;
3488                 /*
3489                  * Entries which are older than twice the time
3490                  * required to wrap the counter are supposed to
3491                  * be zeroed (CA10-49 IBTA, release 1.2.1, V1).
3492                  */
3493                 if ((u64)(ts - cce->timestamp) > (2 * UINT_MAX))
3494                         continue;
3495                 memcpy(cong_log->events[i].local_qp_cn_entry, &cce->lqpn, 3);
3496                 memcpy(cong_log->events[i].remote_qp_number_cn_entry,
3497                        &cce->rqpn, 3);
3498                 cong_log->events[i].sl_svc_type_cn_entry =
3499                         ((cce->sl & 0x1f) << 3) | (cce->svc_type & 0x7);
3500                 cong_log->events[i].remote_lid_cn_entry =
3501                         cpu_to_be32(cce->rlid);
3502                 cong_log->events[i].timestamp_cn_entry =
3503                         cpu_to_be32(cce->timestamp);
3504         }
3505
3506         /*
3507          * Reset threshold_cong_event_map, and threshold_event_counter
3508          * to 0 when log is read.
3509          */
3510         memset(ppd->threshold_cong_event_map, 0x0,
3511                sizeof(ppd->threshold_cong_event_map));
3512         ppd->threshold_event_counter = 0;
3513
3514         spin_unlock_irq(&ppd->cc_log_lock);
3515
3516         if (resp_len)
3517                 *resp_len += sizeof(struct opa_hfi1_cong_log);
3518
3519         return reply((struct ib_mad_hdr *)smp);
3520 }
3521
3522 static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
3523                                    struct ib_device *ibdev, u8 port,
3524                                    u32 *resp_len)
3525 {
3526         struct ib_cc_table_attr *cc_table_attr =
3527                 (struct ib_cc_table_attr *)data;
3528         struct hfi1_ibport *ibp = to_iport(ibdev, port);
3529         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3530         u32 start_block = OPA_AM_START_BLK(am);
3531         u32 n_blocks = OPA_AM_NBLK(am);
3532         struct ib_cc_table_entry_shadow *entries;
3533         int i, j;
3534         u32 sentry, eentry;
3535         struct cc_state *cc_state;
3536
3537         /* sanity check n_blocks, start_block */
3538         if (n_blocks == 0 ||
3539             start_block + n_blocks > ppd->cc_max_table_entries) {
3540                 smp->status |= IB_SMP_INVALID_FIELD;
3541                 return reply((struct ib_mad_hdr *)smp);
3542         }
3543
3544         rcu_read_lock();
3545
3546         cc_state = get_cc_state(ppd);
3547
3548         if (!cc_state) {
3549                 rcu_read_unlock();
3550                 return reply((struct ib_mad_hdr *)smp);
3551         }
3552
3553         sentry = start_block * IB_CCT_ENTRIES;
3554         eentry = sentry + (IB_CCT_ENTRIES * n_blocks);
3555
3556         cc_table_attr->ccti_limit = cpu_to_be16(cc_state->cct.ccti_limit);
3557
3558         entries = cc_state->cct.entries;
3559
3560         /* return n_blocks, though the last block may not be full */
3561         for (j = 0, i = sentry; i < eentry; j++, i++)
3562                 cc_table_attr->ccti_entries[j].entry =
3563                         cpu_to_be16(entries[i].entry);
3564
3565         rcu_read_unlock();
3566
3567         if (resp_len)
3568                 *resp_len += sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1);
3569
3570         return reply((struct ib_mad_hdr *)smp);
3571 }
3572
3573 void cc_state_reclaim(struct rcu_head *rcu)
3574 {
3575         struct cc_state *cc_state = container_of(rcu, struct cc_state, rcu);
3576
3577         kfree(cc_state);
3578 }
3579
3580 static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
3581                                    struct ib_device *ibdev, u8 port,
3582                                    u32 *resp_len)
3583 {
3584         struct ib_cc_table_attr *p = (struct ib_cc_table_attr *)data;
3585         struct hfi1_ibport *ibp = to_iport(ibdev, port);
3586         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3587         u32 start_block = OPA_AM_START_BLK(am);
3588         u32 n_blocks = OPA_AM_NBLK(am);
3589         struct ib_cc_table_entry_shadow *entries;
3590         int i, j;
3591         u32 sentry, eentry;
3592         u16 ccti_limit;
3593
3594         /* sanity check n_blocks, start_block */
3595         if (n_blocks == 0 ||
3596             start_block + n_blocks > ppd->cc_max_table_entries) {
3597                 smp->status |= IB_SMP_INVALID_FIELD;
3598                 return reply((struct ib_mad_hdr *)smp);
3599         }
3600
3601         sentry = start_block * IB_CCT_ENTRIES;
3602         eentry = sentry + ((n_blocks - 1) * IB_CCT_ENTRIES) +
3603                  (be16_to_cpu(p->ccti_limit)) % IB_CCT_ENTRIES + 1;
3604
3605         /* sanity check ccti_limit */
3606         ccti_limit = be16_to_cpu(p->ccti_limit);
3607         if (ccti_limit + 1 > eentry) {
3608                 smp->status |= IB_SMP_INVALID_FIELD;
3609                 return reply((struct ib_mad_hdr *)smp);
3610         }
3611
3612         /*
3613          * Save details from packet into the ppd.  Hold the cc_state_lock so
3614          * our information is consistent with anyone trying to apply the state.
3615          */
3616         spin_lock(&ppd->cc_state_lock);
3617         ppd->total_cct_entry = ccti_limit + 1;
3618         entries = ppd->ccti_entries;
3619         for (j = 0, i = sentry; i < eentry; j++, i++)
3620                 entries[i].entry = be16_to_cpu(p->ccti_entries[j].entry);
3621         spin_unlock(&ppd->cc_state_lock);
3622
3623         /* now apply the information */
3624         apply_cc_state(ppd);
3625
3626         return __subn_get_opa_cc_table(smp, am, data, ibdev, port, resp_len);
3627 }
3628
3629 struct opa_led_info {
3630         __be32 rsvd_led_mask;
3631         __be32 rsvd;
3632 };
3633
3634 #define OPA_LED_SHIFT   31
3635 #define OPA_LED_MASK    BIT(OPA_LED_SHIFT)
3636
3637 static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
3638                                    struct ib_device *ibdev, u8 port,
3639                                    u32 *resp_len)
3640 {
3641         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3642         struct hfi1_pportdata *ppd = dd->pport;
3643         struct opa_led_info *p = (struct opa_led_info *)data;
3644         u32 nport = OPA_AM_NPORT(am);
3645         u32 is_beaconing_active;
3646
3647         if (nport != 1) {
3648                 smp->status |= IB_SMP_INVALID_FIELD;
3649                 return reply((struct ib_mad_hdr *)smp);
3650         }
3651
3652         /*
3653          * This pairs with the memory barrier in hfi1_start_led_override to
3654          * ensure that we read the correct state of LED beaconing represented
3655          * by led_override_timer_active
3656          */
3657         smp_rmb();
3658         is_beaconing_active = !!atomic_read(&ppd->led_override_timer_active);
3659         p->rsvd_led_mask = cpu_to_be32(is_beaconing_active << OPA_LED_SHIFT);
3660
3661         if (resp_len)
3662                 *resp_len += sizeof(struct opa_led_info);
3663
3664         return reply((struct ib_mad_hdr *)smp);
3665 }
3666
3667 static int __subn_set_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
3668                                    struct ib_device *ibdev, u8 port,
3669                                    u32 *resp_len)
3670 {
3671         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3672         struct opa_led_info *p = (struct opa_led_info *)data;
3673         u32 nport = OPA_AM_NPORT(am);
3674         int on = !!(be32_to_cpu(p->rsvd_led_mask) & OPA_LED_MASK);
3675
3676         if (nport != 1) {
3677                 smp->status |= IB_SMP_INVALID_FIELD;
3678                 return reply((struct ib_mad_hdr *)smp);
3679         }
3680
3681         if (on)
3682                 hfi1_start_led_override(dd->pport, 2000, 1500);
3683         else
3684                 shutdown_led_override(dd->pport);
3685
3686         return __subn_get_opa_led_info(smp, am, data, ibdev, port, resp_len);
3687 }
3688
3689 static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
3690                             u8 *data, struct ib_device *ibdev, u8 port,
3691                             u32 *resp_len)
3692 {
3693         int ret;
3694         struct hfi1_ibport *ibp = to_iport(ibdev, port);
3695
3696         switch (attr_id) {
3697         case IB_SMP_ATTR_NODE_DESC:
3698                 ret = __subn_get_opa_nodedesc(smp, am, data, ibdev, port,
3699                                               resp_len);
3700                 break;
3701         case IB_SMP_ATTR_NODE_INFO:
3702                 ret = __subn_get_opa_nodeinfo(smp, am, data, ibdev, port,
3703                                               resp_len);
3704                 break;
3705         case IB_SMP_ATTR_PORT_INFO:
3706                 ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port,
3707                                               resp_len);
3708                 break;
3709         case IB_SMP_ATTR_PKEY_TABLE:
3710                 ret = __subn_get_opa_pkeytable(smp, am, data, ibdev, port,
3711                                                resp_len);
3712                 break;
3713         case OPA_ATTRIB_ID_SL_TO_SC_MAP:
3714                 ret = __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port,
3715                                               resp_len);
3716                 break;
3717         case OPA_ATTRIB_ID_SC_TO_SL_MAP:
3718                 ret = __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port,
3719                                               resp_len);
3720                 break;
3721         case OPA_ATTRIB_ID_SC_TO_VLT_MAP:
3722                 ret = __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port,
3723                                                resp_len);
3724                 break;
3725         case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
3726                 ret = __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port,
3727                                                 resp_len);
3728                 break;
3729         case OPA_ATTRIB_ID_PORT_STATE_INFO:
3730                 ret = __subn_get_opa_psi(smp, am, data, ibdev, port,
3731                                          resp_len);
3732                 break;
3733         case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE:
3734                 ret = __subn_get_opa_bct(smp, am, data, ibdev, port,
3735                                          resp_len);
3736                 break;
3737         case OPA_ATTRIB_ID_CABLE_INFO:
3738                 ret = __subn_get_opa_cable_info(smp, am, data, ibdev, port,
3739                                                 resp_len);
3740                 break;
3741         case IB_SMP_ATTR_VL_ARB_TABLE:
3742                 ret = __subn_get_opa_vl_arb(smp, am, data, ibdev, port,
3743                                             resp_len);
3744                 break;
3745         case OPA_ATTRIB_ID_CONGESTION_INFO:
3746                 ret = __subn_get_opa_cong_info(smp, am, data, ibdev, port,
3747                                                resp_len);
3748                 break;
3749         case OPA_ATTRIB_ID_HFI_CONGESTION_SETTING:
3750                 ret = __subn_get_opa_cong_setting(smp, am, data, ibdev,
3751                                                   port, resp_len);
3752                 break;
3753         case OPA_ATTRIB_ID_HFI_CONGESTION_LOG:
3754                 ret = __subn_get_opa_hfi1_cong_log(smp, am, data, ibdev,
3755                                                    port, resp_len);
3756                 break;
3757         case OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE:
3758                 ret = __subn_get_opa_cc_table(smp, am, data, ibdev, port,
3759                                               resp_len);
3760                 break;
3761         case IB_SMP_ATTR_LED_INFO:
3762                 ret = __subn_get_opa_led_info(smp, am, data, ibdev, port,
3763                                               resp_len);
3764                 break;
3765         case IB_SMP_ATTR_SM_INFO:
3766                 if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
3767                         return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
3768                 if (ibp->rvp.port_cap_flags & IB_PORT_SM)
3769                         return IB_MAD_RESULT_SUCCESS;
3770                 /* FALLTHROUGH */
3771         default:
3772                 smp->status |= IB_SMP_UNSUP_METH_ATTR;
3773                 ret = reply((struct ib_mad_hdr *)smp);
3774                 break;
3775         }
3776         return ret;
3777 }
3778
3779 static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
3780                             u8 *data, struct ib_device *ibdev, u8 port,
3781                             u32 *resp_len)
3782 {
3783         int ret;
3784         struct hfi1_ibport *ibp = to_iport(ibdev, port);
3785
3786         switch (attr_id) {
3787         case IB_SMP_ATTR_PORT_INFO:
3788                 ret = __subn_set_opa_portinfo(smp, am, data, ibdev, port,
3789                                               resp_len);
3790                 break;
3791         case IB_SMP_ATTR_PKEY_TABLE:
3792                 ret = __subn_set_opa_pkeytable(smp, am, data, ibdev, port,
3793                                                resp_len);
3794                 break;
3795         case OPA_ATTRIB_ID_SL_TO_SC_MAP:
3796                 ret = __subn_set_opa_sl_to_sc(smp, am, data, ibdev, port,
3797                                               resp_len);
3798                 break;
3799         case OPA_ATTRIB_ID_SC_TO_SL_MAP:
3800                 ret = __subn_set_opa_sc_to_sl(smp, am, data, ibdev, port,
3801                                               resp_len);
3802                 break;
3803         case OPA_ATTRIB_ID_SC_TO_VLT_MAP:
3804                 ret = __subn_set_opa_sc_to_vlt(smp, am, data, ibdev, port,
3805                                                resp_len);
3806                 break;
3807         case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
3808                 ret = __subn_set_opa_sc_to_vlnt(smp, am, data, ibdev, port,
3809                                                 resp_len);
3810                 break;
3811         case OPA_ATTRIB_ID_PORT_STATE_INFO:
3812                 ret = __subn_set_opa_psi(smp, am, data, ibdev, port,
3813                                          resp_len);
3814                 break;
3815         case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE:
3816                 ret = __subn_set_opa_bct(smp, am, data, ibdev, port,
3817                                          resp_len);
3818                 break;
3819         case IB_SMP_ATTR_VL_ARB_TABLE:
3820                 ret = __subn_set_opa_vl_arb(smp, am, data, ibdev, port,
3821                                             resp_len);
3822                 break;
3823         case OPA_ATTRIB_ID_HFI_CONGESTION_SETTING:
3824                 ret = __subn_set_opa_cong_setting(smp, am, data, ibdev,
3825                                                   port, resp_len);
3826                 break;
3827         case OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE:
3828                 ret = __subn_set_opa_cc_table(smp, am, data, ibdev, port,
3829                                               resp_len);
3830                 break;
3831         case IB_SMP_ATTR_LED_INFO:
3832                 ret = __subn_set_opa_led_info(smp, am, data, ibdev, port,
3833                                               resp_len);
3834                 break;
3835         case IB_SMP_ATTR_SM_INFO:
3836                 if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
3837                         return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
3838                 if (ibp->rvp.port_cap_flags & IB_PORT_SM)
3839                         return IB_MAD_RESULT_SUCCESS;
3840                 /* FALLTHROUGH */
3841         default:
3842                 smp->status |= IB_SMP_UNSUP_METH_ATTR;
3843                 ret = reply((struct ib_mad_hdr *)smp);
3844                 break;
3845         }
3846         return ret;
3847 }
3848
3849 static inline void set_aggr_error(struct opa_aggregate *ag)
3850 {
3851         ag->err_reqlength |= cpu_to_be16(0x8000);
3852 }
3853
3854 static int subn_get_opa_aggregate(struct opa_smp *smp,
3855                                   struct ib_device *ibdev, u8 port,
3856                                   u32 *resp_len)
3857 {
3858         int i;
3859         u32 num_attr = be32_to_cpu(smp->attr_mod) & 0x000000ff;
3860         u8 *next_smp = opa_get_smp_data(smp);
3861
3862         if (num_attr < 1 || num_attr > 117) {
3863                 smp->status |= IB_SMP_INVALID_FIELD;
3864                 return reply((struct ib_mad_hdr *)smp);
3865         }
3866
3867         for (i = 0; i < num_attr; i++) {
3868                 struct opa_aggregate *agg;
3869                 size_t agg_data_len;
3870                 size_t agg_size;
3871                 u32 am;
3872
3873                 agg = (struct opa_aggregate *)next_smp;
3874                 agg_data_len = (be16_to_cpu(agg->err_reqlength) & 0x007f) * 8;
3875                 agg_size = sizeof(*agg) + agg_data_len;
3876                 am = be32_to_cpu(agg->attr_mod);
3877
3878                 *resp_len += agg_size;
3879
3880                 if (next_smp + agg_size > ((u8 *)smp) + sizeof(*smp)) {
3881                         smp->status |= IB_SMP_INVALID_FIELD;
3882                         return reply((struct ib_mad_hdr *)smp);
3883                 }
3884
3885                 /* zero the payload for this segment */
3886                 memset(next_smp + sizeof(*agg), 0, agg_data_len);
3887
3888                 (void)subn_get_opa_sma(agg->attr_id, smp, am, agg->data,
3889                                         ibdev, port, NULL);
3890                 if (smp->status & ~IB_SMP_DIRECTION) {
3891                         set_aggr_error(agg);
3892                         return reply((struct ib_mad_hdr *)smp);
3893                 }
3894                 next_smp += agg_size;
3895         }
3896
3897         return reply((struct ib_mad_hdr *)smp);
3898 }
3899
3900 static int subn_set_opa_aggregate(struct opa_smp *smp,
3901                                   struct ib_device *ibdev, u8 port,
3902                                   u32 *resp_len)
3903 {
3904         int i;
3905         u32 num_attr = be32_to_cpu(smp->attr_mod) & 0x000000ff;
3906         u8 *next_smp = opa_get_smp_data(smp);
3907
3908         if (num_attr < 1 || num_attr > 117) {
3909                 smp->status |= IB_SMP_INVALID_FIELD;
3910                 return reply((struct ib_mad_hdr *)smp);
3911         }
3912
3913         for (i = 0; i < num_attr; i++) {
3914                 struct opa_aggregate *agg;
3915                 size_t agg_data_len;
3916                 size_t agg_size;
3917                 u32 am;
3918
3919                 agg = (struct opa_aggregate *)next_smp;
3920                 agg_data_len = (be16_to_cpu(agg->err_reqlength) & 0x007f) * 8;
3921                 agg_size = sizeof(*agg) + agg_data_len;
3922                 am = be32_to_cpu(agg->attr_mod);
3923
3924                 *resp_len += agg_size;
3925
3926                 if (next_smp + agg_size > ((u8 *)smp) + sizeof(*smp)) {
3927                         smp->status |= IB_SMP_INVALID_FIELD;
3928                         return reply((struct ib_mad_hdr *)smp);
3929                 }
3930
3931                 (void)subn_set_opa_sma(agg->attr_id, smp, am, agg->data,
3932                                         ibdev, port, NULL);
3933                 if (smp->status & ~IB_SMP_DIRECTION) {
3934                         set_aggr_error(agg);
3935                         return reply((struct ib_mad_hdr *)smp);
3936                 }
3937                 next_smp += agg_size;
3938         }
3939
3940         return reply((struct ib_mad_hdr *)smp);
3941 }
3942
3943 /*
3944  * OPAv1 specifies that, on the transition to link up, these counters
3945  * are cleared:
3946  *   PortRcvErrors [*]
3947  *   LinkErrorRecovery
3948  *   LocalLinkIntegrityErrors
3949  *   ExcessiveBufferOverruns [*]
3950  *
3951  * [*] Error info associated with these counters is retained, but the
3952  * error info status is reset to 0.
3953  */
3954 void clear_linkup_counters(struct hfi1_devdata *dd)
3955 {
3956         /* PortRcvErrors */
3957         write_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL, 0);
3958         dd->err_info_rcvport.status_and_code &= ~OPA_EI_STATUS_SMASK;
3959         /* LinkErrorRecovery */
3960         write_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL, 0);
3961         write_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL, 0);
3962         /* LocalLinkIntegrityErrors */
3963         write_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL, 0);
3964         write_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL, 0);
3965         /* ExcessiveBufferOverruns */
3966         write_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL, 0);
3967         dd->rcv_ovfl_cnt = 0;
3968         dd->err_info_xmit_constraint.status &= ~OPA_EI_STATUS_SMASK;
3969 }
3970
3971 /*
3972  * is_local_mad() returns 1 if 'mad' is sent from, and destined to the
3973  * local node, 0 otherwise.
3974  */
3975 static int is_local_mad(struct hfi1_ibport *ibp, const struct opa_mad *mad,
3976                         const struct ib_wc *in_wc)
3977 {
3978         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3979         const struct opa_smp *smp = (const struct opa_smp *)mad;
3980
3981         if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
3982                 return (smp->hop_cnt == 0 &&
3983                         smp->route.dr.dr_slid == OPA_LID_PERMISSIVE &&
3984                         smp->route.dr.dr_dlid == OPA_LID_PERMISSIVE);
3985         }
3986
3987         return (in_wc->slid == ppd->lid);
3988 }
3989
3990 /*
3991  * opa_local_smp_check() should only be called on MADs for which
3992  * is_local_mad() returns true. It applies the SMP checks that are
3993  * specific to SMPs which are sent from, and destined to this node.
3994  * opa_local_smp_check() returns 0 if the SMP passes its checks, 1
3995  * otherwise.
3996  *
3997  * SMPs which arrive from other nodes are instead checked by
3998  * opa_smp_check().
3999  */
4000 static int opa_local_smp_check(struct hfi1_ibport *ibp,
4001                                const struct ib_wc *in_wc)
4002 {
4003         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
4004         u16 slid = in_wc->slid;
4005         u16 pkey;
4006
4007         if (in_wc->pkey_index >= ARRAY_SIZE(ppd->pkeys))
4008                 return 1;
4009
4010         pkey = ppd->pkeys[in_wc->pkey_index];
4011         /*
4012          * We need to do the "node-local" checks specified in OPAv1,
4013          * rev 0.90, section 9.10.26, which are:
4014          *   - pkey is 0x7fff, or 0xffff
4015          *   - Source QPN == 0 || Destination QPN == 0
4016          *   - the MAD header's management class is either
4017          *     IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE or
4018          *     IB_MGMT_CLASS_SUBN_LID_ROUTED
4019          *   - SLID != 0
4020          *
4021          * However, we know (and so don't need to check again) that,
4022          * for local SMPs, the MAD stack passes MADs with:
4023          *   - Source QPN of 0
4024          *   - MAD mgmt_class is IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
4025          *   - SLID is either: OPA_LID_PERMISSIVE (0xFFFFFFFF), or
4026          *     our own port's lid
4027          *
4028          */
4029         if (pkey == LIM_MGMT_P_KEY || pkey == FULL_MGMT_P_KEY)
4030                 return 0;
4031         ingress_pkey_table_fail(ppd, pkey, slid);
4032         return 1;
4033 }
4034
4035 static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
4036                             u8 port, const struct opa_mad *in_mad,
4037                             struct opa_mad *out_mad,
4038                             u32 *resp_len)
4039 {
4040         struct opa_smp *smp = (struct opa_smp *)out_mad;
4041         struct hfi1_ibport *ibp = to_iport(ibdev, port);
4042         u8 *data;
4043         u32 am;
4044         __be16 attr_id;
4045         int ret;
4046
4047         *out_mad = *in_mad;
4048         data = opa_get_smp_data(smp);
4049
4050         am = be32_to_cpu(smp->attr_mod);
4051         attr_id = smp->attr_id;
4052         if (smp->class_version != OPA_SMI_CLASS_VERSION) {
4053                 smp->status |= IB_SMP_UNSUP_VERSION;
4054                 ret = reply((struct ib_mad_hdr *)smp);
4055                 return ret;
4056         }
4057         ret = check_mkey(ibp, (struct ib_mad_hdr *)smp, mad_flags, smp->mkey,
4058                          smp->route.dr.dr_slid, smp->route.dr.return_path,
4059                          smp->hop_cnt);
4060         if (ret) {
4061                 u32 port_num = be32_to_cpu(smp->attr_mod);
4062
4063                 /*
4064                  * If this is a get/set portinfo, we already check the
4065                  * M_Key if the MAD is for another port and the M_Key
4066                  * is OK on the receiving port. This check is needed
4067                  * to increment the error counters when the M_Key
4068                  * fails to match on *both* ports.
4069                  */
4070                 if (attr_id == IB_SMP_ATTR_PORT_INFO &&
4071                     (smp->method == IB_MGMT_METHOD_GET ||
4072                      smp->method == IB_MGMT_METHOD_SET) &&
4073                     port_num && port_num <= ibdev->phys_port_cnt &&
4074                     port != port_num)
4075                         (void)check_mkey(to_iport(ibdev, port_num),
4076                                           (struct ib_mad_hdr *)smp, 0,
4077                                           smp->mkey, smp->route.dr.dr_slid,
4078                                           smp->route.dr.return_path,
4079                                           smp->hop_cnt);
4080                 ret = IB_MAD_RESULT_FAILURE;
4081                 return ret;
4082         }
4083
4084         *resp_len = opa_get_smp_header_size(smp);
4085
4086         switch (smp->method) {
4087         case IB_MGMT_METHOD_GET:
4088                 switch (attr_id) {
4089                 default:
4090                         clear_opa_smp_data(smp);
4091                         ret = subn_get_opa_sma(attr_id, smp, am, data,
4092                                                ibdev, port, resp_len);
4093                         break;
4094                 case OPA_ATTRIB_ID_AGGREGATE:
4095                         ret = subn_get_opa_aggregate(smp, ibdev, port,
4096                                                      resp_len);
4097                         break;
4098                 }
4099                 break;
4100         case IB_MGMT_METHOD_SET:
4101                 switch (attr_id) {
4102                 default:
4103                         ret = subn_set_opa_sma(attr_id, smp, am, data,
4104                                                ibdev, port, resp_len);
4105                         break;
4106                 case OPA_ATTRIB_ID_AGGREGATE:
4107                         ret = subn_set_opa_aggregate(smp, ibdev, port,
4108                                                      resp_len);
4109                         break;
4110                 }
4111                 break;
4112         case IB_MGMT_METHOD_TRAP:
4113         case IB_MGMT_METHOD_REPORT:
4114         case IB_MGMT_METHOD_REPORT_RESP:
4115         case IB_MGMT_METHOD_GET_RESP:
4116                 /*
4117                  * The ib_mad module will call us to process responses
4118                  * before checking for other consumers.
4119                  * Just tell the caller to process it normally.
4120                  */
4121                 ret = IB_MAD_RESULT_SUCCESS;
4122                 break;
4123         default:
4124                 smp->status |= IB_SMP_UNSUP_METHOD;
4125                 ret = reply((struct ib_mad_hdr *)smp);
4126                 break;
4127         }
4128
4129         return ret;
4130 }
4131
4132 static int process_subn(struct ib_device *ibdev, int mad_flags,
4133                         u8 port, const struct ib_mad *in_mad,
4134                         struct ib_mad *out_mad)
4135 {
4136         struct ib_smp *smp = (struct ib_smp *)out_mad;
4137         struct hfi1_ibport *ibp = to_iport(ibdev, port);
4138         int ret;
4139
4140         *out_mad = *in_mad;
4141         if (smp->class_version != 1) {
4142                 smp->status |= IB_SMP_UNSUP_VERSION;
4143                 ret = reply((struct ib_mad_hdr *)smp);
4144                 return ret;
4145         }
4146
4147         ret = check_mkey(ibp, (struct ib_mad_hdr *)smp, mad_flags,
4148                          smp->mkey, (__force __be32)smp->dr_slid,
4149                          smp->return_path, smp->hop_cnt);
4150         if (ret) {
4151                 u32 port_num = be32_to_cpu(smp->attr_mod);
4152
4153                 /*
4154                  * If this is a get/set portinfo, we already check the
4155                  * M_Key if the MAD is for another port and the M_Key
4156                  * is OK on the receiving port. This check is needed
4157                  * to increment the error counters when the M_Key
4158                  * fails to match on *both* ports.
4159                  */
4160                 if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO &&
4161                     (smp->method == IB_MGMT_METHOD_GET ||
4162                      smp->method == IB_MGMT_METHOD_SET) &&
4163                     port_num && port_num <= ibdev->phys_port_cnt &&
4164                     port != port_num)
4165                         (void)check_mkey(to_iport(ibdev, port_num),
4166                                          (struct ib_mad_hdr *)smp, 0,
4167                                          smp->mkey,
4168                                          (__force __be32)smp->dr_slid,
4169                                          smp->return_path, smp->hop_cnt);
4170                 ret = IB_MAD_RESULT_FAILURE;
4171                 return ret;
4172         }
4173
4174         switch (smp->method) {
4175         case IB_MGMT_METHOD_GET:
4176                 switch (smp->attr_id) {
4177                 case IB_SMP_ATTR_NODE_INFO:
4178                         ret = subn_get_nodeinfo(smp, ibdev, port);
4179                         break;
4180                 default:
4181                         smp->status |= IB_SMP_UNSUP_METH_ATTR;
4182                         ret = reply((struct ib_mad_hdr *)smp);
4183                         break;
4184                 }
4185                 break;
4186         }
4187
4188         return ret;
4189 }
4190
4191 static int process_perf(struct ib_device *ibdev, u8 port,
4192                         const struct ib_mad *in_mad,
4193                         struct ib_mad *out_mad)
4194 {
4195         struct ib_pma_mad *pmp = (struct ib_pma_mad *)out_mad;
4196         struct ib_class_port_info *cpi = (struct ib_class_port_info *)
4197                                                 &pmp->data;
4198         int ret = IB_MAD_RESULT_FAILURE;
4199
4200         *out_mad = *in_mad;
4201         if (pmp->mad_hdr.class_version != 1) {
4202                 pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
4203                 ret = reply((struct ib_mad_hdr *)pmp);
4204                 return ret;
4205         }
4206
4207         switch (pmp->mad_hdr.method) {
4208         case IB_MGMT_METHOD_GET:
4209                 switch (pmp->mad_hdr.attr_id) {
4210                 case IB_PMA_PORT_COUNTERS:
4211                         ret = pma_get_ib_portcounters(pmp, ibdev, port);
4212                         break;
4213                 case IB_PMA_PORT_COUNTERS_EXT:
4214                         ret = pma_get_ib_portcounters_ext(pmp, ibdev, port);
4215                         break;
4216                 case IB_PMA_CLASS_PORT_INFO:
4217                         cpi->capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
4218                         ret = reply((struct ib_mad_hdr *)pmp);
4219                         break;
4220                 default:
4221                         pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4222                         ret = reply((struct ib_mad_hdr *)pmp);
4223                         break;
4224                 }
4225                 break;
4226
4227         case IB_MGMT_METHOD_SET:
4228                 if (pmp->mad_hdr.attr_id) {
4229                         pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4230                         ret = reply((struct ib_mad_hdr *)pmp);
4231                 }
4232                 break;
4233
4234         case IB_MGMT_METHOD_TRAP:
4235         case IB_MGMT_METHOD_GET_RESP:
4236                 /*
4237                  * The ib_mad module will call us to process responses
4238                  * before checking for other consumers.
4239                  * Just tell the caller to process it normally.
4240                  */
4241                 ret = IB_MAD_RESULT_SUCCESS;
4242                 break;
4243
4244         default:
4245                 pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
4246                 ret = reply((struct ib_mad_hdr *)pmp);
4247                 break;
4248         }
4249
4250         return ret;
4251 }
4252
4253 static int process_perf_opa(struct ib_device *ibdev, u8 port,
4254                             const struct opa_mad *in_mad,
4255                             struct opa_mad *out_mad, u32 *resp_len)
4256 {
4257         struct opa_pma_mad *pmp = (struct opa_pma_mad *)out_mad;
4258         int ret;
4259
4260         *out_mad = *in_mad;
4261
4262         if (pmp->mad_hdr.class_version != OPA_SMI_CLASS_VERSION) {
4263                 pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
4264                 return reply((struct ib_mad_hdr *)pmp);
4265         }
4266
4267         *resp_len = sizeof(pmp->mad_hdr);
4268
4269         switch (pmp->mad_hdr.method) {
4270         case IB_MGMT_METHOD_GET:
4271                 switch (pmp->mad_hdr.attr_id) {
4272                 case IB_PMA_CLASS_PORT_INFO:
4273                         ret = pma_get_opa_classportinfo(pmp, ibdev, resp_len);
4274                         break;
4275                 case OPA_PM_ATTRIB_ID_PORT_STATUS:
4276                         ret = pma_get_opa_portstatus(pmp, ibdev, port,
4277                                                      resp_len);
4278                         break;
4279                 case OPA_PM_ATTRIB_ID_DATA_PORT_COUNTERS:
4280                         ret = pma_get_opa_datacounters(pmp, ibdev, port,
4281                                                        resp_len);
4282                         break;
4283                 case OPA_PM_ATTRIB_ID_ERROR_PORT_COUNTERS:
4284                         ret = pma_get_opa_porterrors(pmp, ibdev, port,
4285                                                      resp_len);
4286                         break;
4287                 case OPA_PM_ATTRIB_ID_ERROR_INFO:
4288                         ret = pma_get_opa_errorinfo(pmp, ibdev, port,
4289                                                     resp_len);
4290                         break;
4291                 default:
4292                         pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4293                         ret = reply((struct ib_mad_hdr *)pmp);
4294                         break;
4295                 }
4296                 break;
4297
4298         case IB_MGMT_METHOD_SET:
4299                 switch (pmp->mad_hdr.attr_id) {
4300                 case OPA_PM_ATTRIB_ID_CLEAR_PORT_STATUS:
4301                         ret = pma_set_opa_portstatus(pmp, ibdev, port,
4302                                                      resp_len);
4303                         break;
4304                 case OPA_PM_ATTRIB_ID_ERROR_INFO:
4305                         ret = pma_set_opa_errorinfo(pmp, ibdev, port,
4306                                                     resp_len);
4307                         break;
4308                 default:
4309                         pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4310                         ret = reply((struct ib_mad_hdr *)pmp);
4311                         break;
4312                 }
4313                 break;
4314
4315         case IB_MGMT_METHOD_TRAP:
4316         case IB_MGMT_METHOD_GET_RESP:
4317                 /*
4318                  * The ib_mad module will call us to process responses
4319                  * before checking for other consumers.
4320                  * Just tell the caller to process it normally.
4321                  */
4322                 ret = IB_MAD_RESULT_SUCCESS;
4323                 break;
4324
4325         default:
4326                 pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
4327                 ret = reply((struct ib_mad_hdr *)pmp);
4328                 break;
4329         }
4330
4331         return ret;
4332 }
4333
4334 static int hfi1_process_opa_mad(struct ib_device *ibdev, int mad_flags,
4335                                 u8 port, const struct ib_wc *in_wc,
4336                                 const struct ib_grh *in_grh,
4337                                 const struct opa_mad *in_mad,
4338                                 struct opa_mad *out_mad, size_t *out_mad_size,
4339                                 u16 *out_mad_pkey_index)
4340 {
4341         int ret;
4342         int pkey_idx;
4343         u32 resp_len = 0;
4344         struct hfi1_ibport *ibp = to_iport(ibdev, port);
4345
4346         pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);
4347         if (pkey_idx < 0) {
4348                 pr_warn("failed to find limited mgmt pkey, defaulting 0x%x\n",
4349                         hfi1_get_pkey(ibp, 1));
4350                 pkey_idx = 1;
4351         }
4352         *out_mad_pkey_index = (u16)pkey_idx;
4353
4354         switch (in_mad->mad_hdr.mgmt_class) {
4355         case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
4356         case IB_MGMT_CLASS_SUBN_LID_ROUTED:
4357                 if (is_local_mad(ibp, in_mad, in_wc)) {
4358                         ret = opa_local_smp_check(ibp, in_wc);
4359                         if (ret)
4360                                 return IB_MAD_RESULT_FAILURE;
4361                 }
4362                 ret = process_subn_opa(ibdev, mad_flags, port, in_mad,
4363                                        out_mad, &resp_len);
4364                 goto bail;
4365         case IB_MGMT_CLASS_PERF_MGMT:
4366                 ret = process_perf_opa(ibdev, port, in_mad, out_mad,
4367                                        &resp_len);
4368                 goto bail;
4369
4370         default:
4371                 ret = IB_MAD_RESULT_SUCCESS;
4372         }
4373
4374 bail:
4375         if (ret & IB_MAD_RESULT_REPLY)
4376                 *out_mad_size = round_up(resp_len, 8);
4377         else if (ret & IB_MAD_RESULT_SUCCESS)
4378                 *out_mad_size = in_wc->byte_len - sizeof(struct ib_grh);
4379
4380         return ret;
4381 }
4382
4383 static int hfi1_process_ib_mad(struct ib_device *ibdev, int mad_flags, u8 port,
4384                                const struct ib_wc *in_wc,
4385                                const struct ib_grh *in_grh,
4386                                const struct ib_mad *in_mad,
4387                                struct ib_mad *out_mad)
4388 {
4389         int ret;
4390
4391         switch (in_mad->mad_hdr.mgmt_class) {
4392         case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
4393         case IB_MGMT_CLASS_SUBN_LID_ROUTED:
4394                 ret = process_subn(ibdev, mad_flags, port, in_mad, out_mad);
4395                 break;
4396         case IB_MGMT_CLASS_PERF_MGMT:
4397                 ret = process_perf(ibdev, port, in_mad, out_mad);
4398                 break;
4399         default:
4400                 ret = IB_MAD_RESULT_SUCCESS;
4401                 break;
4402         }
4403
4404         return ret;
4405 }
4406
4407 /**
4408  * hfi1_process_mad - process an incoming MAD packet
4409  * @ibdev: the infiniband device this packet came in on
4410  * @mad_flags: MAD flags
4411  * @port: the port number this packet came in on
4412  * @in_wc: the work completion entry for this packet
4413  * @in_grh: the global route header for this packet
4414  * @in_mad: the incoming MAD
4415  * @out_mad: any outgoing MAD reply
4416  *
4417  * Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not
4418  * interested in processing.
4419  *
4420  * Note that the verbs framework has already done the MAD sanity checks,
4421  * and hop count/pointer updating for IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
4422  * MADs.
4423  *
4424  * This is called by the ib_mad module.
4425  */
4426 int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
4427                      const struct ib_wc *in_wc, const struct ib_grh *in_grh,
4428                      const struct ib_mad_hdr *in_mad, size_t in_mad_size,
4429                      struct ib_mad_hdr *out_mad, size_t *out_mad_size,
4430                      u16 *out_mad_pkey_index)
4431 {
4432         switch (in_mad->base_version) {
4433         case OPA_MGMT_BASE_VERSION:
4434                 if (unlikely(in_mad_size != sizeof(struct opa_mad))) {
4435                         dev_err(ibdev->dma_device, "invalid in_mad_size\n");
4436                         return IB_MAD_RESULT_FAILURE;
4437                 }
4438                 return hfi1_process_opa_mad(ibdev, mad_flags, port,
4439                                             in_wc, in_grh,
4440                                             (struct opa_mad *)in_mad,
4441                                             (struct opa_mad *)out_mad,
4442                                             out_mad_size,
4443                                             out_mad_pkey_index);
4444         case IB_MGMT_BASE_VERSION:
4445                 return hfi1_process_ib_mad(ibdev, mad_flags, port,
4446                                           in_wc, in_grh,
4447                                           (const struct ib_mad *)in_mad,
4448                                           (struct ib_mad *)out_mad);
4449         default:
4450                 break;
4451         }
4452
4453         return IB_MAD_RESULT_FAILURE;
4454 }