]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - drivers/edac/i7core_edac.c
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
[karo-tx-linux.git] / drivers / edac / i7core_edac.c
1 /* Intel i7 core/Nehalem Memory Controller kernel module
2  *
3  * This driver supports the memory controllers found on the Intel
4  * processor families i7core, i7core 7xx/8xx, i5core, Xeon 35xx,
5  * Xeon 55xx and Xeon 56xx also known as Nehalem, Nehalem-EP, Lynnfield
6  * and Westmere-EP.
7  *
8  * This file may be distributed under the terms of the
9  * GNU General Public License version 2 only.
10  *
11  * Copyright (c) 2009-2010 by:
12  *       Mauro Carvalho Chehab <mchehab@redhat.com>
13  *
14  * Red Hat Inc. http://www.redhat.com
15  *
16  * Forked and adapted from the i5400_edac driver
17  *
18  * Based on the following public Intel datasheets:
19  * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
20  * Datasheet, Volume 2:
21  *      http://download.intel.com/design/processor/datashts/320835.pdf
22  * Intel Xeon Processor 5500 Series Datasheet Volume 2
23  *      http://www.intel.com/Assets/PDF/datasheet/321322.pdf
24  * also available at:
25  *      http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
26  */
27
28 #include <linux/module.h>
29 #include <linux/init.h>
30 #include <linux/pci.h>
31 #include <linux/pci_ids.h>
32 #include <linux/slab.h>
33 #include <linux/delay.h>
34 #include <linux/dmi.h>
35 #include <linux/edac.h>
36 #include <linux/mmzone.h>
37 #include <linux/smp.h>
38 #include <asm/mce.h>
39 #include <asm/processor.h>
40 #include <asm/div64.h>
41
42 #include "edac_core.h"
43
44 /* Static vars */
45 static LIST_HEAD(i7core_edac_list);
46 static DEFINE_MUTEX(i7core_edac_lock);
47 static int probed;
48
49 static int use_pci_fixup;
50 module_param(use_pci_fixup, int, 0444);
51 MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices");
52 /*
53  * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
54  * registers start at bus 255, and are not reported by BIOS.
55  * We currently find devices with only 2 sockets. In order to support more QPI
56  * Quick Path Interconnect, just increment this number.
57  */
58 #define MAX_SOCKET_BUSES        2
59
60
61 /*
62  * Alter this version for the module when modifications are made
63  */
64 #define I7CORE_REVISION    " Ver: 1.0.0"
65 #define EDAC_MOD_STR      "i7core_edac"
66
67 /*
68  * Debug macros
69  */
70 #define i7core_printk(level, fmt, arg...)                       \
71         edac_printk(level, "i7core", fmt, ##arg)
72
73 #define i7core_mc_printk(mci, level, fmt, arg...)               \
74         edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
75
76 /*
77  * i7core Memory Controller Registers
78  */
79
80         /* OFFSETS for Device 0 Function 0 */
81
82 #define MC_CFG_CONTROL  0x90
83   #define MC_CFG_UNLOCK         0x02
84   #define MC_CFG_LOCK           0x00
85
86         /* OFFSETS for Device 3 Function 0 */
87
88 #define MC_CONTROL      0x48
89 #define MC_STATUS       0x4c
90 #define MC_MAX_DOD      0x64
91
92 /*
93  * OFFSETS for Device 3 Function 4, as indicated on Xeon 5500 datasheet:
94  * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
95  */
96
97 #define MC_TEST_ERR_RCV1        0x60
98   #define DIMM2_COR_ERR(r)                      ((r) & 0x7fff)
99
100 #define MC_TEST_ERR_RCV0        0x64
101   #define DIMM1_COR_ERR(r)                      (((r) >> 16) & 0x7fff)
102   #define DIMM0_COR_ERR(r)                      ((r) & 0x7fff)
103
104 /* OFFSETS for Device 3 Function 2, as indicated on Xeon 5500 datasheet */
105 #define MC_SSRCONTROL           0x48
106   #define SSR_MODE_DISABLE      0x00
107   #define SSR_MODE_ENABLE       0x01
108   #define SSR_MODE_MASK         0x03
109
110 #define MC_SCRUB_CONTROL        0x4c
111   #define STARTSCRUB            (1 << 24)
112   #define SCRUBINTERVAL_MASK    0xffffff
113
114 #define MC_COR_ECC_CNT_0        0x80
115 #define MC_COR_ECC_CNT_1        0x84
116 #define MC_COR_ECC_CNT_2        0x88
117 #define MC_COR_ECC_CNT_3        0x8c
118 #define MC_COR_ECC_CNT_4        0x90
119 #define MC_COR_ECC_CNT_5        0x94
120
121 #define DIMM_TOP_COR_ERR(r)                     (((r) >> 16) & 0x7fff)
122 #define DIMM_BOT_COR_ERR(r)                     ((r) & 0x7fff)
123
124
125         /* OFFSETS for Devices 4,5 and 6 Function 0 */
126
127 #define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
128   #define THREE_DIMMS_PRESENT           (1 << 24)
129   #define SINGLE_QUAD_RANK_PRESENT      (1 << 23)
130   #define QUAD_RANK_PRESENT             (1 << 22)
131   #define REGISTERED_DIMM               (1 << 15)
132
133 #define MC_CHANNEL_MAPPER       0x60
134   #define RDLCH(r, ch)          ((((r) >> (3 + (ch * 6))) & 0x07) - 1)
135   #define WRLCH(r, ch)          ((((r) >> (ch * 6)) & 0x07) - 1)
136
137 #define MC_CHANNEL_RANK_PRESENT 0x7c
138   #define RANK_PRESENT_MASK             0xffff
139
140 #define MC_CHANNEL_ADDR_MATCH   0xf0
141 #define MC_CHANNEL_ERROR_MASK   0xf8
142 #define MC_CHANNEL_ERROR_INJECT 0xfc
143   #define INJECT_ADDR_PARITY    0x10
144   #define INJECT_ECC            0x08
145   #define MASK_CACHELINE        0x06
146   #define MASK_FULL_CACHELINE   0x06
147   #define MASK_MSB32_CACHELINE  0x04
148   #define MASK_LSB32_CACHELINE  0x02
149   #define NO_MASK_CACHELINE     0x00
150   #define REPEAT_EN             0x01
151
152         /* OFFSETS for Devices 4,5 and 6 Function 1 */
153
154 #define MC_DOD_CH_DIMM0         0x48
155 #define MC_DOD_CH_DIMM1         0x4c
156 #define MC_DOD_CH_DIMM2         0x50
157   #define RANKOFFSET_MASK       ((1 << 12) | (1 << 11) | (1 << 10))
158   #define RANKOFFSET(x)         ((x & RANKOFFSET_MASK) >> 10)
159   #define DIMM_PRESENT_MASK     (1 << 9)
160   #define DIMM_PRESENT(x)       (((x) & DIMM_PRESENT_MASK) >> 9)
161   #define MC_DOD_NUMBANK_MASK           ((1 << 8) | (1 << 7))
162   #define MC_DOD_NUMBANK(x)             (((x) & MC_DOD_NUMBANK_MASK) >> 7)
163   #define MC_DOD_NUMRANK_MASK           ((1 << 6) | (1 << 5))
164   #define MC_DOD_NUMRANK(x)             (((x) & MC_DOD_NUMRANK_MASK) >> 5)
165   #define MC_DOD_NUMROW_MASK            ((1 << 4) | (1 << 3) | (1 << 2))
166   #define MC_DOD_NUMROW(x)              (((x) & MC_DOD_NUMROW_MASK) >> 2)
167   #define MC_DOD_NUMCOL_MASK            3
168   #define MC_DOD_NUMCOL(x)              ((x) & MC_DOD_NUMCOL_MASK)
169
170 #define MC_RANK_PRESENT         0x7c
171
172 #define MC_SAG_CH_0     0x80
173 #define MC_SAG_CH_1     0x84
174 #define MC_SAG_CH_2     0x88
175 #define MC_SAG_CH_3     0x8c
176 #define MC_SAG_CH_4     0x90
177 #define MC_SAG_CH_5     0x94
178 #define MC_SAG_CH_6     0x98
179 #define MC_SAG_CH_7     0x9c
180
181 #define MC_RIR_LIMIT_CH_0       0x40
182 #define MC_RIR_LIMIT_CH_1       0x44
183 #define MC_RIR_LIMIT_CH_2       0x48
184 #define MC_RIR_LIMIT_CH_3       0x4C
185 #define MC_RIR_LIMIT_CH_4       0x50
186 #define MC_RIR_LIMIT_CH_5       0x54
187 #define MC_RIR_LIMIT_CH_6       0x58
188 #define MC_RIR_LIMIT_CH_7       0x5C
189 #define MC_RIR_LIMIT_MASK       ((1 << 10) - 1)
190
191 #define MC_RIR_WAY_CH           0x80
192   #define MC_RIR_WAY_OFFSET_MASK        (((1 << 14) - 1) & ~0x7)
193   #define MC_RIR_WAY_RANK_MASK          0x7
194
195 /*
196  * i7core structs
197  */
198
199 #define NUM_CHANS 3
200 #define MAX_DIMMS 3             /* Max DIMMS per channel */
201 #define MAX_MCR_FUNC  4
202 #define MAX_CHAN_FUNC 3
203
204 struct i7core_info {
205         u32     mc_control;
206         u32     mc_status;
207         u32     max_dod;
208         u32     ch_map;
209 };
210
211
212 struct i7core_inject {
213         int     enable;
214
215         u32     section;
216         u32     type;
217         u32     eccmask;
218
219         /* Error address mask */
220         int channel, dimm, rank, bank, page, col;
221 };
222
223 struct i7core_channel {
224         bool            is_3dimms_present;
225         bool            is_single_4rank;
226         bool            has_4rank;
227         u32             dimms;
228 };
229
230 struct pci_id_descr {
231         int                     dev;
232         int                     func;
233         int                     dev_id;
234         int                     optional;
235 };
236
237 struct pci_id_table {
238         const struct pci_id_descr       *descr;
239         int                             n_devs;
240 };
241
242 struct i7core_dev {
243         struct list_head        list;
244         u8                      socket;
245         struct pci_dev          **pdev;
246         int                     n_devs;
247         struct mem_ctl_info     *mci;
248 };
249
250 struct i7core_pvt {
251         struct pci_dev  *pci_noncore;
252         struct pci_dev  *pci_mcr[MAX_MCR_FUNC + 1];
253         struct pci_dev  *pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
254
255         struct i7core_dev *i7core_dev;
256
257         struct i7core_info      info;
258         struct i7core_inject    inject;
259         struct i7core_channel   channel[NUM_CHANS];
260
261         int             ce_count_available;
262
263                         /* ECC corrected errors counts per udimm */
264         unsigned long   udimm_ce_count[MAX_DIMMS];
265         int             udimm_last_ce_count[MAX_DIMMS];
266                         /* ECC corrected errors counts per rdimm */
267         unsigned long   rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
268         int             rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
269
270         bool            is_registered, enable_scrub;
271
272         /* Fifo double buffers */
273         struct mce              mce_entry[MCE_LOG_LEN];
274         struct mce              mce_outentry[MCE_LOG_LEN];
275
276         /* Fifo in/out counters */
277         unsigned                mce_in, mce_out;
278
279         /* Count indicator to show errors not got */
280         unsigned                mce_overrun;
281
282         /* DCLK Frequency used for computing scrub rate */
283         int                     dclk_freq;
284
285         /* Struct to control EDAC polling */
286         struct edac_pci_ctl_info *i7core_pci;
287 };
288
289 #define PCI_DESCR(device, function, device_id)  \
290         .dev = (device),                        \
291         .func = (function),                     \
292         .dev_id = (device_id)
293
294 static const struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
295                 /* Memory controller */
296         { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR)     },
297         { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD)  },
298                         /* Exists only for RDIMM */
299         { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1  },
300         { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
301
302                 /* Channel 0 */
303         { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
304         { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
305         { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
306         { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC)   },
307
308                 /* Channel 1 */
309         { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
310         { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
311         { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
312         { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC)   },
313
314                 /* Channel 2 */
315         { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
316         { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
317         { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
318         { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC)   },
319
320                 /* Generic Non-core registers */
321         /*
322          * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
323          * On Xeon 55xx, however, it has a different id (8086:2c40). So,
324          * the probing code needs to test for the other address in case of
325          * failure of this one
326          */
327         { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE)  },
328
329 };
330
331 static const struct pci_id_descr pci_dev_descr_lynnfield[] = {
332         { PCI_DESCR( 3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR)         },
333         { PCI_DESCR( 3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD)      },
334         { PCI_DESCR( 3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST)     },
335
336         { PCI_DESCR( 4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL) },
337         { PCI_DESCR( 4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR) },
338         { PCI_DESCR( 4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK) },
339         { PCI_DESCR( 4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC)   },
340
341         { PCI_DESCR( 5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL) },
342         { PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) },
343         { PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) },
344         { PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC)   },
345
346         /*
347          * This is the PCI device has an alternate address on some
348          * processors like Core i7 860
349          */
350         { PCI_DESCR( 0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE)     },
351 };
352
353 static const struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
354                 /* Memory controller */
355         { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR_REV2)     },
356         { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD_REV2)  },
357                         /* Exists only for RDIMM */
358         { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_RAS_REV2), .optional = 1  },
359         { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST_REV2) },
360
361                 /* Channel 0 */
362         { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL_REV2) },
363         { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR_REV2) },
364         { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK_REV2) },
365         { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC_REV2)   },
366
367                 /* Channel 1 */
368         { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL_REV2) },
369         { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR_REV2) },
370         { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK_REV2) },
371         { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC_REV2)   },
372
373                 /* Channel 2 */
374         { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_CTRL_REV2) },
375         { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) },
376         { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) },
377         { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2)   },
378
379                 /* Generic Non-core registers */
380         { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2)  },
381
382 };
383
384 #define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) }
385 static const struct pci_id_table pci_dev_table[] = {
386         PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_nehalem),
387         PCI_ID_TABLE_ENTRY(pci_dev_descr_lynnfield),
388         PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_westmere),
389         {0,}                    /* 0 terminated list. */
390 };
391
392 /*
393  *      pci_device_id   table for which devices we are looking for
394  */
395 static DEFINE_PCI_DEVICE_TABLE(i7core_pci_tbl) = {
396         {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
397         {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)},
398         {0,}                    /* 0 terminated list. */
399 };
400
401 /****************************************************************************
402                         Ancillary status routines
403  ****************************************************************************/
404
405         /* MC_CONTROL bits */
406 #define CH_ACTIVE(pvt, ch)      ((pvt)->info.mc_control & (1 << (8 + ch)))
407 #define ECCx8(pvt)              ((pvt)->info.mc_control & (1 << 1))
408
409         /* MC_STATUS bits */
410 #define ECC_ENABLED(pvt)        ((pvt)->info.mc_status & (1 << 4))
411 #define CH_DISABLED(pvt, ch)    ((pvt)->info.mc_status & (1 << ch))
412
413         /* MC_MAX_DOD read functions */
414 static inline int numdimms(u32 dimms)
415 {
416         return (dimms & 0x3) + 1;
417 }
418
419 static inline int numrank(u32 rank)
420 {
421         static int ranks[4] = { 1, 2, 4, -EINVAL };
422
423         return ranks[rank & 0x3];
424 }
425
426 static inline int numbank(u32 bank)
427 {
428         static int banks[4] = { 4, 8, 16, -EINVAL };
429
430         return banks[bank & 0x3];
431 }
432
433 static inline int numrow(u32 row)
434 {
435         static int rows[8] = {
436                 1 << 12, 1 << 13, 1 << 14, 1 << 15,
437                 1 << 16, -EINVAL, -EINVAL, -EINVAL,
438         };
439
440         return rows[row & 0x7];
441 }
442
443 static inline int numcol(u32 col)
444 {
445         static int cols[8] = {
446                 1 << 10, 1 << 11, 1 << 12, -EINVAL,
447         };
448         return cols[col & 0x3];
449 }
450
451 static struct i7core_dev *get_i7core_dev(u8 socket)
452 {
453         struct i7core_dev *i7core_dev;
454
455         list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
456                 if (i7core_dev->socket == socket)
457                         return i7core_dev;
458         }
459
460         return NULL;
461 }
462
463 static struct i7core_dev *alloc_i7core_dev(u8 socket,
464                                            const struct pci_id_table *table)
465 {
466         struct i7core_dev *i7core_dev;
467
468         i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
469         if (!i7core_dev)
470                 return NULL;
471
472         i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * table->n_devs,
473                                    GFP_KERNEL);
474         if (!i7core_dev->pdev) {
475                 kfree(i7core_dev);
476                 return NULL;
477         }
478
479         i7core_dev->socket = socket;
480         i7core_dev->n_devs = table->n_devs;
481         list_add_tail(&i7core_dev->list, &i7core_edac_list);
482
483         return i7core_dev;
484 }
485
486 static void free_i7core_dev(struct i7core_dev *i7core_dev)
487 {
488         list_del(&i7core_dev->list);
489         kfree(i7core_dev->pdev);
490         kfree(i7core_dev);
491 }
492
493 /****************************************************************************
494                         Memory check routines
495  ****************************************************************************/
496
497 static int get_dimm_config(struct mem_ctl_info *mci)
498 {
499         struct i7core_pvt *pvt = mci->pvt_info;
500         struct pci_dev *pdev;
501         int i, j;
502         enum edac_type mode;
503         enum mem_type mtype;
504         struct dimm_info *dimm;
505
506         /* Get data from the MC register, function 0 */
507         pdev = pvt->pci_mcr[0];
508         if (!pdev)
509                 return -ENODEV;
510
511         /* Device 3 function 0 reads */
512         pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
513         pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
514         pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
515         pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
516
517         debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
518                 pvt->i7core_dev->socket, pvt->info.mc_control, pvt->info.mc_status,
519                 pvt->info.max_dod, pvt->info.ch_map);
520
521         if (ECC_ENABLED(pvt)) {
522                 debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
523                 if (ECCx8(pvt))
524                         mode = EDAC_S8ECD8ED;
525                 else
526                         mode = EDAC_S4ECD4ED;
527         } else {
528                 debugf0("ECC disabled\n");
529                 mode = EDAC_NONE;
530         }
531
532         /* FIXME: need to handle the error codes */
533         debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked "
534                 "x%x x 0x%x\n",
535                 numdimms(pvt->info.max_dod),
536                 numrank(pvt->info.max_dod >> 2),
537                 numbank(pvt->info.max_dod >> 4),
538                 numrow(pvt->info.max_dod >> 6),
539                 numcol(pvt->info.max_dod >> 9));
540
541         for (i = 0; i < NUM_CHANS; i++) {
542                 u32 data, dimm_dod[3], value[8];
543
544                 if (!pvt->pci_ch[i][0])
545                         continue;
546
547                 if (!CH_ACTIVE(pvt, i)) {
548                         debugf0("Channel %i is not active\n", i);
549                         continue;
550                 }
551                 if (CH_DISABLED(pvt, i)) {
552                         debugf0("Channel %i is disabled\n", i);
553                         continue;
554                 }
555
556                 /* Devices 4-6 function 0 */
557                 pci_read_config_dword(pvt->pci_ch[i][0],
558                                 MC_CHANNEL_DIMM_INIT_PARAMS, &data);
559
560
561                 if (data & THREE_DIMMS_PRESENT)
562                         pvt->channel[i].is_3dimms_present = true;
563
564                 if (data & SINGLE_QUAD_RANK_PRESENT)
565                         pvt->channel[i].is_single_4rank = true;
566
567                 if (data & QUAD_RANK_PRESENT)
568                         pvt->channel[i].has_4rank = true;
569
570                 if (data & REGISTERED_DIMM)
571                         mtype = MEM_RDDR3;
572                 else
573                         mtype = MEM_DDR3;
574
575                 /* Devices 4-6 function 1 */
576                 pci_read_config_dword(pvt->pci_ch[i][1],
577                                 MC_DOD_CH_DIMM0, &dimm_dod[0]);
578                 pci_read_config_dword(pvt->pci_ch[i][1],
579                                 MC_DOD_CH_DIMM1, &dimm_dod[1]);
580                 pci_read_config_dword(pvt->pci_ch[i][1],
581                                 MC_DOD_CH_DIMM2, &dimm_dod[2]);
582
583                 debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
584                         "%s%s%s%cDIMMs\n",
585                         i,
586                         RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
587                         data,
588                         pvt->channel[i].is_3dimms_present ? "3DIMMS " : "",
589                         pvt->channel[i].is_3dimms_present ? "SINGLE_4R " : "",
590                         pvt->channel[i].has_4rank ? "HAS_4R " : "",
591                         (data & REGISTERED_DIMM) ? 'R' : 'U');
592
593                 for (j = 0; j < 3; j++) {
594                         u32 banks, ranks, rows, cols;
595                         u32 size, npages;
596
597                         if (!DIMM_PRESENT(dimm_dod[j]))
598                                 continue;
599
600                         dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
601                                        i, j, 0);
602                         banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
603                         ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
604                         rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
605                         cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
606
607                         /* DDR3 has 8 I/O banks */
608                         size = (rows * cols * banks * ranks) >> (20 - 3);
609
610                         debugf0("\tdimm %d %d Mb offset: %x, "
611                                 "bank: %d, rank: %d, row: %#x, col: %#x\n",
612                                 j, size,
613                                 RANKOFFSET(dimm_dod[j]),
614                                 banks, ranks, rows, cols);
615
616                         npages = MiB_TO_PAGES(size);
617
618                         dimm->nr_pages = npages;
619
620                         switch (banks) {
621                         case 4:
622                                 dimm->dtype = DEV_X4;
623                                 break;
624                         case 8:
625                                 dimm->dtype = DEV_X8;
626                                 break;
627                         case 16:
628                                 dimm->dtype = DEV_X16;
629                                 break;
630                         default:
631                                 dimm->dtype = DEV_UNKNOWN;
632                         }
633
634                         snprintf(dimm->label, sizeof(dimm->label),
635                                  "CPU#%uChannel#%u_DIMM#%u",
636                                  pvt->i7core_dev->socket, i, j);
637                         dimm->grain = 8;
638                         dimm->edac_mode = mode;
639                         dimm->mtype = mtype;
640                 }
641
642                 pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
643                 pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
644                 pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
645                 pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
646                 pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
647                 pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
648                 pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
649                 pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
650                 debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
651                 for (j = 0; j < 8; j++)
652                         debugf1("\t\t%#x\t%#x\t%#x\n",
653                                 (value[j] >> 27) & 0x1,
654                                 (value[j] >> 24) & 0x7,
655                                 (value[j] & ((1 << 24) - 1)));
656         }
657
658         return 0;
659 }
660
661 /****************************************************************************
662                         Error insertion routines
663  ****************************************************************************/
664
665 /* The i7core has independent error injection features per channel.
666    However, to have a simpler code, we don't allow enabling error injection
667    on more than one channel.
668    Also, since a change at an inject parameter will be applied only at enable,
669    we're disabling error injection on all write calls to the sysfs nodes that
670    controls the error code injection.
671  */
672 static int disable_inject(const struct mem_ctl_info *mci)
673 {
674         struct i7core_pvt *pvt = mci->pvt_info;
675
676         pvt->inject.enable = 0;
677
678         if (!pvt->pci_ch[pvt->inject.channel][0])
679                 return -ENODEV;
680
681         pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
682                                 MC_CHANNEL_ERROR_INJECT, 0);
683
684         return 0;
685 }
686
687 /*
688  * i7core inject inject.section
689  *
690  *      accept and store error injection inject.section value
691  *      bit 0 - refers to the lower 32-byte half cacheline
692  *      bit 1 - refers to the upper 32-byte half cacheline
693  */
694 static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
695                                            const char *data, size_t count)
696 {
697         struct i7core_pvt *pvt = mci->pvt_info;
698         unsigned long value;
699         int rc;
700
701         if (pvt->inject.enable)
702                 disable_inject(mci);
703
704         rc = strict_strtoul(data, 10, &value);
705         if ((rc < 0) || (value > 3))
706                 return -EIO;
707
708         pvt->inject.section = (u32) value;
709         return count;
710 }
711
712 static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
713                                               char *data)
714 {
715         struct i7core_pvt *pvt = mci->pvt_info;
716         return sprintf(data, "0x%08x\n", pvt->inject.section);
717 }
718
719 /*
720  * i7core inject.type
721  *
722  *      accept and store error injection inject.section value
723  *      bit 0 - repeat enable - Enable error repetition
724  *      bit 1 - inject ECC error
725  *      bit 2 - inject parity error
726  */
727 static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
728                                         const char *data, size_t count)
729 {
730         struct i7core_pvt *pvt = mci->pvt_info;
731         unsigned long value;
732         int rc;
733
734         if (pvt->inject.enable)
735                 disable_inject(mci);
736
737         rc = strict_strtoul(data, 10, &value);
738         if ((rc < 0) || (value > 7))
739                 return -EIO;
740
741         pvt->inject.type = (u32) value;
742         return count;
743 }
744
745 static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
746                                               char *data)
747 {
748         struct i7core_pvt *pvt = mci->pvt_info;
749         return sprintf(data, "0x%08x\n", pvt->inject.type);
750 }
751
752 /*
753  * i7core_inject_inject.eccmask_store
754  *
755  * The type of error (UE/CE) will depend on the inject.eccmask value:
756  *   Any bits set to a 1 will flip the corresponding ECC bit
757  *   Correctable errors can be injected by flipping 1 bit or the bits within
758  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
759  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
760  *   uncorrectable error to be injected.
761  */
762 static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
763                                         const char *data, size_t count)
764 {
765         struct i7core_pvt *pvt = mci->pvt_info;
766         unsigned long value;
767         int rc;
768
769         if (pvt->inject.enable)
770                 disable_inject(mci);
771
772         rc = strict_strtoul(data, 10, &value);
773         if (rc < 0)
774                 return -EIO;
775
776         pvt->inject.eccmask = (u32) value;
777         return count;
778 }
779
780 static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
781                                               char *data)
782 {
783         struct i7core_pvt *pvt = mci->pvt_info;
784         return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
785 }
786
787 /*
788  * i7core_addrmatch
789  *
790  * The type of error (UE/CE) will depend on the inject.eccmask value:
791  *   Any bits set to a 1 will flip the corresponding ECC bit
792  *   Correctable errors can be injected by flipping 1 bit or the bits within
793  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
794  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
795  *   uncorrectable error to be injected.
796  */
797
798 #define DECLARE_ADDR_MATCH(param, limit)                        \
799 static ssize_t i7core_inject_store_##param(                     \
800                 struct mem_ctl_info *mci,                       \
801                 const char *data, size_t count)                 \
802 {                                                               \
803         struct i7core_pvt *pvt;                                 \
804         long value;                                             \
805         int rc;                                                 \
806                                                                 \
807         debugf1("%s()\n", __func__);                            \
808         pvt = mci->pvt_info;                                    \
809                                                                 \
810         if (pvt->inject.enable)                                 \
811                 disable_inject(mci);                            \
812                                                                 \
813         if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\
814                 value = -1;                                     \
815         else {                                                  \
816                 rc = strict_strtoul(data, 10, &value);          \
817                 if ((rc < 0) || (value >= limit))               \
818                         return -EIO;                            \
819         }                                                       \
820                                                                 \
821         pvt->inject.param = value;                              \
822                                                                 \
823         return count;                                           \
824 }                                                               \
825                                                                 \
826 static ssize_t i7core_inject_show_##param(                      \
827                 struct mem_ctl_info *mci,                       \
828                 char *data)                                     \
829 {                                                               \
830         struct i7core_pvt *pvt;                                 \
831                                                                 \
832         pvt = mci->pvt_info;                                    \
833         debugf1("%s() pvt=%p\n", __func__, pvt);                \
834         if (pvt->inject.param < 0)                              \
835                 return sprintf(data, "any\n");                  \
836         else                                                    \
837                 return sprintf(data, "%d\n", pvt->inject.param);\
838 }
839
840 #define ATTR_ADDR_MATCH(param)                                  \
841         {                                                       \
842                 .attr = {                                       \
843                         .name = #param,                         \
844                         .mode = (S_IRUGO | S_IWUSR)             \
845                 },                                              \
846                 .show  = i7core_inject_show_##param,            \
847                 .store = i7core_inject_store_##param,           \
848         }
849
850 DECLARE_ADDR_MATCH(channel, 3);
851 DECLARE_ADDR_MATCH(dimm, 3);
852 DECLARE_ADDR_MATCH(rank, 4);
853 DECLARE_ADDR_MATCH(bank, 32);
854 DECLARE_ADDR_MATCH(page, 0x10000);
855 DECLARE_ADDR_MATCH(col, 0x4000);
856
857 static int write_and_test(struct pci_dev *dev, const int where, const u32 val)
858 {
859         u32 read;
860         int count;
861
862         debugf0("setting pci %02x:%02x.%x reg=%02x value=%08x\n",
863                 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
864                 where, val);
865
866         for (count = 0; count < 10; count++) {
867                 if (count)
868                         msleep(100);
869                 pci_write_config_dword(dev, where, val);
870                 pci_read_config_dword(dev, where, &read);
871
872                 if (read == val)
873                         return 0;
874         }
875
876         i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
877                 "write=%08x. Read=%08x\n",
878                 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
879                 where, val, read);
880
881         return -EINVAL;
882 }
883
884 /*
885  * This routine prepares the Memory Controller for error injection.
886  * The error will be injected when some process tries to write to the
887  * memory that matches the given criteria.
888  * The criteria can be set in terms of a mask where dimm, rank, bank, page
889  * and col can be specified.
890  * A -1 value for any of the mask items will make the MCU to ignore
891  * that matching criteria for error injection.
892  *
893  * It should be noticed that the error will only happen after a write operation
894  * on a memory that matches the condition. if REPEAT_EN is not enabled at
895  * inject mask, then it will produce just one error. Otherwise, it will repeat
896  * until the injectmask would be cleaned.
897  *
898  * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
899  *    is reliable enough to check if the MC is using the
900  *    three channels. However, this is not clear at the datasheet.
901  */
902 static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
903                                        const char *data, size_t count)
904 {
905         struct i7core_pvt *pvt = mci->pvt_info;
906         u32 injectmask;
907         u64 mask = 0;
908         int  rc;
909         long enable;
910
911         if (!pvt->pci_ch[pvt->inject.channel][0])
912                 return 0;
913
914         rc = strict_strtoul(data, 10, &enable);
915         if ((rc < 0))
916                 return 0;
917
918         if (enable) {
919                 pvt->inject.enable = 1;
920         } else {
921                 disable_inject(mci);
922                 return count;
923         }
924
925         /* Sets pvt->inject.dimm mask */
926         if (pvt->inject.dimm < 0)
927                 mask |= 1LL << 41;
928         else {
929                 if (pvt->channel[pvt->inject.channel].dimms > 2)
930                         mask |= (pvt->inject.dimm & 0x3LL) << 35;
931                 else
932                         mask |= (pvt->inject.dimm & 0x1LL) << 36;
933         }
934
935         /* Sets pvt->inject.rank mask */
936         if (pvt->inject.rank < 0)
937                 mask |= 1LL << 40;
938         else {
939                 if (pvt->channel[pvt->inject.channel].dimms > 2)
940                         mask |= (pvt->inject.rank & 0x1LL) << 34;
941                 else
942                         mask |= (pvt->inject.rank & 0x3LL) << 34;
943         }
944
945         /* Sets pvt->inject.bank mask */
946         if (pvt->inject.bank < 0)
947                 mask |= 1LL << 39;
948         else
949                 mask |= (pvt->inject.bank & 0x15LL) << 30;
950
951         /* Sets pvt->inject.page mask */
952         if (pvt->inject.page < 0)
953                 mask |= 1LL << 38;
954         else
955                 mask |= (pvt->inject.page & 0xffff) << 14;
956
957         /* Sets pvt->inject.column mask */
958         if (pvt->inject.col < 0)
959                 mask |= 1LL << 37;
960         else
961                 mask |= (pvt->inject.col & 0x3fff);
962
963         /*
964          * bit    0: REPEAT_EN
965          * bits 1-2: MASK_HALF_CACHELINE
966          * bit    3: INJECT_ECC
967          * bit    4: INJECT_ADDR_PARITY
968          */
969
970         injectmask = (pvt->inject.type & 1) |
971                      (pvt->inject.section & 0x3) << 1 |
972                      (pvt->inject.type & 0x6) << (3 - 1);
973
974         /* Unlock writes to registers - this register is write only */
975         pci_write_config_dword(pvt->pci_noncore,
976                                MC_CFG_CONTROL, 0x2);
977
978         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
979                                MC_CHANNEL_ADDR_MATCH, mask);
980         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
981                                MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
982
983         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
984                                MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
985
986         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
987                                MC_CHANNEL_ERROR_INJECT, injectmask);
988
989         /*
990          * This is something undocumented, based on my tests
991          * Without writing 8 to this register, errors aren't injected. Not sure
992          * why.
993          */
994         pci_write_config_dword(pvt->pci_noncore,
995                                MC_CFG_CONTROL, 8);
996
997         debugf0("Error inject addr match 0x%016llx, ecc 0x%08x,"
998                 " inject 0x%08x\n",
999                 mask, pvt->inject.eccmask, injectmask);
1000
1001
1002         return count;
1003 }
1004
1005 static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
1006                                         char *data)
1007 {
1008         struct i7core_pvt *pvt = mci->pvt_info;
1009         u32 injectmask;
1010
1011         if (!pvt->pci_ch[pvt->inject.channel][0])
1012                 return 0;
1013
1014         pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
1015                                MC_CHANNEL_ERROR_INJECT, &injectmask);
1016
1017         debugf0("Inject error read: 0x%018x\n", injectmask);
1018
1019         if (injectmask & 0x0c)
1020                 pvt->inject.enable = 1;
1021
1022         return sprintf(data, "%d\n", pvt->inject.enable);
1023 }
1024
1025 #define DECLARE_COUNTER(param)                                  \
1026 static ssize_t i7core_show_counter_##param(                     \
1027                 struct mem_ctl_info *mci,                       \
1028                 char *data)                                     \
1029 {                                                               \
1030         struct i7core_pvt *pvt = mci->pvt_info;                 \
1031                                                                 \
1032         debugf1("%s() \n", __func__);                           \
1033         if (!pvt->ce_count_available || (pvt->is_registered))   \
1034                 return sprintf(data, "data unavailable\n");     \
1035         return sprintf(data, "%lu\n",                           \
1036                         pvt->udimm_ce_count[param]);            \
1037 }
1038
1039 #define ATTR_COUNTER(param)                                     \
1040         {                                                       \
1041                 .attr = {                                       \
1042                         .name = __stringify(udimm##param),      \
1043                         .mode = (S_IRUGO | S_IWUSR)             \
1044                 },                                              \
1045                 .show  = i7core_show_counter_##param            \
1046         }
1047
1048 DECLARE_COUNTER(0);
1049 DECLARE_COUNTER(1);
1050 DECLARE_COUNTER(2);
1051
1052 /*
1053  * Sysfs struct
1054  */
1055
1056 static const struct mcidev_sysfs_attribute i7core_addrmatch_attrs[] = {
1057         ATTR_ADDR_MATCH(channel),
1058         ATTR_ADDR_MATCH(dimm),
1059         ATTR_ADDR_MATCH(rank),
1060         ATTR_ADDR_MATCH(bank),
1061         ATTR_ADDR_MATCH(page),
1062         ATTR_ADDR_MATCH(col),
1063         { } /* End of list */
1064 };
1065
1066 static const struct mcidev_sysfs_group i7core_inject_addrmatch = {
1067         .name  = "inject_addrmatch",
1068         .mcidev_attr = i7core_addrmatch_attrs,
1069 };
1070
1071 static const struct mcidev_sysfs_attribute i7core_udimm_counters_attrs[] = {
1072         ATTR_COUNTER(0),
1073         ATTR_COUNTER(1),
1074         ATTR_COUNTER(2),
1075         { .attr = { .name = NULL } }
1076 };
1077
1078 static const struct mcidev_sysfs_group i7core_udimm_counters = {
1079         .name  = "all_channel_counts",
1080         .mcidev_attr = i7core_udimm_counters_attrs,
1081 };
1082
1083 static const struct mcidev_sysfs_attribute i7core_sysfs_rdimm_attrs[] = {
1084         {
1085                 .attr = {
1086                         .name = "inject_section",
1087                         .mode = (S_IRUGO | S_IWUSR)
1088                 },
1089                 .show  = i7core_inject_section_show,
1090                 .store = i7core_inject_section_store,
1091         }, {
1092                 .attr = {
1093                         .name = "inject_type",
1094                         .mode = (S_IRUGO | S_IWUSR)
1095                 },
1096                 .show  = i7core_inject_type_show,
1097                 .store = i7core_inject_type_store,
1098         }, {
1099                 .attr = {
1100                         .name = "inject_eccmask",
1101                         .mode = (S_IRUGO | S_IWUSR)
1102                 },
1103                 .show  = i7core_inject_eccmask_show,
1104                 .store = i7core_inject_eccmask_store,
1105         }, {
1106                 .grp = &i7core_inject_addrmatch,
1107         }, {
1108                 .attr = {
1109                         .name = "inject_enable",
1110                         .mode = (S_IRUGO | S_IWUSR)
1111                 },
1112                 .show  = i7core_inject_enable_show,
1113                 .store = i7core_inject_enable_store,
1114         },
1115         { }     /* End of list */
1116 };
1117
1118 static const struct mcidev_sysfs_attribute i7core_sysfs_udimm_attrs[] = {
1119         {
1120                 .attr = {
1121                         .name = "inject_section",
1122                         .mode = (S_IRUGO | S_IWUSR)
1123                 },
1124                 .show  = i7core_inject_section_show,
1125                 .store = i7core_inject_section_store,
1126         }, {
1127                 .attr = {
1128                         .name = "inject_type",
1129                         .mode = (S_IRUGO | S_IWUSR)
1130                 },
1131                 .show  = i7core_inject_type_show,
1132                 .store = i7core_inject_type_store,
1133         }, {
1134                 .attr = {
1135                         .name = "inject_eccmask",
1136                         .mode = (S_IRUGO | S_IWUSR)
1137                 },
1138                 .show  = i7core_inject_eccmask_show,
1139                 .store = i7core_inject_eccmask_store,
1140         }, {
1141                 .grp = &i7core_inject_addrmatch,
1142         }, {
1143                 .attr = {
1144                         .name = "inject_enable",
1145                         .mode = (S_IRUGO | S_IWUSR)
1146                 },
1147                 .show  = i7core_inject_enable_show,
1148                 .store = i7core_inject_enable_store,
1149         }, {
1150                 .grp = &i7core_udimm_counters,
1151         },
1152         { }     /* End of list */
1153 };
1154
1155 /****************************************************************************
1156         Device initialization routines: put/get, init/exit
1157  ****************************************************************************/
1158
1159 /*
1160  *      i7core_put_all_devices  'put' all the devices that we have
1161  *                              reserved via 'get'
1162  */
1163 static void i7core_put_devices(struct i7core_dev *i7core_dev)
1164 {
1165         int i;
1166
1167         debugf0(__FILE__ ": %s()\n", __func__);
1168         for (i = 0; i < i7core_dev->n_devs; i++) {
1169                 struct pci_dev *pdev = i7core_dev->pdev[i];
1170                 if (!pdev)
1171                         continue;
1172                 debugf0("Removing dev %02x:%02x.%d\n",
1173                         pdev->bus->number,
1174                         PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1175                 pci_dev_put(pdev);
1176         }
1177 }
1178
1179 static void i7core_put_all_devices(void)
1180 {
1181         struct i7core_dev *i7core_dev, *tmp;
1182
1183         list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
1184                 i7core_put_devices(i7core_dev);
1185                 free_i7core_dev(i7core_dev);
1186         }
1187 }
1188
1189 static void __init i7core_xeon_pci_fixup(const struct pci_id_table *table)
1190 {
1191         struct pci_dev *pdev = NULL;
1192         int i;
1193
1194         /*
1195          * On Xeon 55xx, the Intel Quick Path Arch Generic Non-core pci buses
1196          * aren't announced by acpi. So, we need to use a legacy scan probing
1197          * to detect them
1198          */
1199         while (table && table->descr) {
1200                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, table->descr[0].dev_id, NULL);
1201                 if (unlikely(!pdev)) {
1202                         for (i = 0; i < MAX_SOCKET_BUSES; i++)
1203                                 pcibios_scan_specific_bus(255-i);
1204                 }
1205                 pci_dev_put(pdev);
1206                 table++;
1207         }
1208 }
1209
1210 static unsigned i7core_pci_lastbus(void)
1211 {
1212         int last_bus = 0, bus;
1213         struct pci_bus *b = NULL;
1214
1215         while ((b = pci_find_next_bus(b)) != NULL) {
1216                 bus = b->number;
1217                 debugf0("Found bus %d\n", bus);
1218                 if (bus > last_bus)
1219                         last_bus = bus;
1220         }
1221
1222         debugf0("Last bus %d\n", last_bus);
1223
1224         return last_bus;
1225 }
1226
1227 /*
1228  *      i7core_get_all_devices  Find and perform 'get' operation on the MCH's
1229  *                      device/functions we want to reference for this driver
1230  *
1231  *                      Need to 'get' device 16 func 1 and func 2
1232  */
1233 static int i7core_get_onedevice(struct pci_dev **prev,
1234                                 const struct pci_id_table *table,
1235                                 const unsigned devno,
1236                                 const unsigned last_bus)
1237 {
1238         struct i7core_dev *i7core_dev;
1239         const struct pci_id_descr *dev_descr = &table->descr[devno];
1240
1241         struct pci_dev *pdev = NULL;
1242         u8 bus = 0;
1243         u8 socket = 0;
1244
1245         pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1246                               dev_descr->dev_id, *prev);
1247
1248         /*
1249          * On Xeon 55xx, the Intel QuickPath Arch Generic Non-core regs
1250          * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1251          * to probe for the alternate address in case of failure
1252          */
1253         if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev)
1254                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1255                                       PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev);
1256
1257         if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && !pdev)
1258                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1259                                       PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT,
1260                                       *prev);
1261
1262         if (!pdev) {
1263                 if (*prev) {
1264                         *prev = pdev;
1265                         return 0;
1266                 }
1267
1268                 if (dev_descr->optional)
1269                         return 0;
1270
1271                 if (devno == 0)
1272                         return -ENODEV;
1273
1274                 i7core_printk(KERN_INFO,
1275                         "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
1276                         dev_descr->dev, dev_descr->func,
1277                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1278
1279                 /* End of list, leave */
1280                 return -ENODEV;
1281         }
1282         bus = pdev->bus->number;
1283
1284         socket = last_bus - bus;
1285
1286         i7core_dev = get_i7core_dev(socket);
1287         if (!i7core_dev) {
1288                 i7core_dev = alloc_i7core_dev(socket, table);
1289                 if (!i7core_dev) {
1290                         pci_dev_put(pdev);
1291                         return -ENOMEM;
1292                 }
1293         }
1294
1295         if (i7core_dev->pdev[devno]) {
1296                 i7core_printk(KERN_ERR,
1297                         "Duplicated device for "
1298                         "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1299                         bus, dev_descr->dev, dev_descr->func,
1300                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1301                 pci_dev_put(pdev);
1302                 return -ENODEV;
1303         }
1304
1305         i7core_dev->pdev[devno] = pdev;
1306
1307         /* Sanity check */
1308         if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1309                         PCI_FUNC(pdev->devfn) != dev_descr->func)) {
1310                 i7core_printk(KERN_ERR,
1311                         "Device PCI ID %04x:%04x "
1312                         "has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
1313                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
1314                         bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1315                         bus, dev_descr->dev, dev_descr->func);
1316                 return -ENODEV;
1317         }
1318
1319         /* Be sure that the device is enabled */
1320         if (unlikely(pci_enable_device(pdev) < 0)) {
1321                 i7core_printk(KERN_ERR,
1322                         "Couldn't enable "
1323                         "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1324                         bus, dev_descr->dev, dev_descr->func,
1325                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1326                 return -ENODEV;
1327         }
1328
1329         debugf0("Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
1330                 socket, bus, dev_descr->dev,
1331                 dev_descr->func,
1332                 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1333
1334         /*
1335          * As stated on drivers/pci/search.c, the reference count for
1336          * @from is always decremented if it is not %NULL. So, as we need
1337          * to get all devices up to null, we need to do a get for the device
1338          */
1339         pci_dev_get(pdev);
1340
1341         *prev = pdev;
1342
1343         return 0;
1344 }
1345
1346 static int i7core_get_all_devices(void)
1347 {
1348         int i, rc, last_bus;
1349         struct pci_dev *pdev = NULL;
1350         const struct pci_id_table *table = pci_dev_table;
1351
1352         last_bus = i7core_pci_lastbus();
1353
1354         while (table && table->descr) {
1355                 for (i = 0; i < table->n_devs; i++) {
1356                         pdev = NULL;
1357                         do {
1358                                 rc = i7core_get_onedevice(&pdev, table, i,
1359                                                           last_bus);
1360                                 if (rc < 0) {
1361                                         if (i == 0) {
1362                                                 i = table->n_devs;
1363                                                 break;
1364                                         }
1365                                         i7core_put_all_devices();
1366                                         return -ENODEV;
1367                                 }
1368                         } while (pdev);
1369                 }
1370                 table++;
1371         }
1372
1373         return 0;
1374 }
1375
1376 static int mci_bind_devs(struct mem_ctl_info *mci,
1377                          struct i7core_dev *i7core_dev)
1378 {
1379         struct i7core_pvt *pvt = mci->pvt_info;
1380         struct pci_dev *pdev;
1381         int i, func, slot;
1382         char *family;
1383
1384         pvt->is_registered = false;
1385         pvt->enable_scrub  = false;
1386         for (i = 0; i < i7core_dev->n_devs; i++) {
1387                 pdev = i7core_dev->pdev[i];
1388                 if (!pdev)
1389                         continue;
1390
1391                 func = PCI_FUNC(pdev->devfn);
1392                 slot = PCI_SLOT(pdev->devfn);
1393                 if (slot == 3) {
1394                         if (unlikely(func > MAX_MCR_FUNC))
1395                                 goto error;
1396                         pvt->pci_mcr[func] = pdev;
1397                 } else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1398                         if (unlikely(func > MAX_CHAN_FUNC))
1399                                 goto error;
1400                         pvt->pci_ch[slot - 4][func] = pdev;
1401                 } else if (!slot && !func) {
1402                         pvt->pci_noncore = pdev;
1403
1404                         /* Detect the processor family */
1405                         switch (pdev->device) {
1406                         case PCI_DEVICE_ID_INTEL_I7_NONCORE:
1407                                 family = "Xeon 35xx/ i7core";
1408                                 pvt->enable_scrub = false;
1409                                 break;
1410                         case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT:
1411                                 family = "i7-800/i5-700";
1412                                 pvt->enable_scrub = false;
1413                                 break;
1414                         case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE:
1415                                 family = "Xeon 34xx";
1416                                 pvt->enable_scrub = false;
1417                                 break;
1418                         case PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT:
1419                                 family = "Xeon 55xx";
1420                                 pvt->enable_scrub = true;
1421                                 break;
1422                         case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2:
1423                                 family = "Xeon 56xx / i7-900";
1424                                 pvt->enable_scrub = true;
1425                                 break;
1426                         default:
1427                                 family = "unknown";
1428                                 pvt->enable_scrub = false;
1429                         }
1430                         debugf0("Detected a processor type %s\n", family);
1431                 } else
1432                         goto error;
1433
1434                 debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
1435                         PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1436                         pdev, i7core_dev->socket);
1437
1438                 if (PCI_SLOT(pdev->devfn) == 3 &&
1439                         PCI_FUNC(pdev->devfn) == 2)
1440                         pvt->is_registered = true;
1441         }
1442
1443         return 0;
1444
1445 error:
1446         i7core_printk(KERN_ERR, "Device %d, function %d "
1447                       "is out of the expected range\n",
1448                       slot, func);
1449         return -EINVAL;
1450 }
1451
1452 /****************************************************************************
1453                         Error check routines
1454  ****************************************************************************/
1455 static void i7core_rdimm_update_errcount(struct mem_ctl_info *mci,
1456                                       const int chan,
1457                                       const int dimm,
1458                                       const int add)
1459 {
1460         int i;
1461
1462         for (i = 0; i < add; i++) {
1463                 edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 0, 0, 0,
1464                                      chan, dimm, -1, "error", "", NULL);
1465         }
1466 }
1467
1468 static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
1469                                          const int chan,
1470                                          const int new0,
1471                                          const int new1,
1472                                          const int new2)
1473 {
1474         struct i7core_pvt *pvt = mci->pvt_info;
1475         int add0 = 0, add1 = 0, add2 = 0;
1476         /* Updates CE counters if it is not the first time here */
1477         if (pvt->ce_count_available) {
1478                 /* Updates CE counters */
1479
1480                 add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1481                 add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1482                 add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
1483
1484                 if (add2 < 0)
1485                         add2 += 0x7fff;
1486                 pvt->rdimm_ce_count[chan][2] += add2;
1487
1488                 if (add1 < 0)
1489                         add1 += 0x7fff;
1490                 pvt->rdimm_ce_count[chan][1] += add1;
1491
1492                 if (add0 < 0)
1493                         add0 += 0x7fff;
1494                 pvt->rdimm_ce_count[chan][0] += add0;
1495         } else
1496                 pvt->ce_count_available = 1;
1497
1498         /* Store the new values */
1499         pvt->rdimm_last_ce_count[chan][2] = new2;
1500         pvt->rdimm_last_ce_count[chan][1] = new1;
1501         pvt->rdimm_last_ce_count[chan][0] = new0;
1502
1503         /*updated the edac core */
1504         if (add0 != 0)
1505                 i7core_rdimm_update_errcount(mci, chan, 0, add0);
1506         if (add1 != 0)
1507                 i7core_rdimm_update_errcount(mci, chan, 1, add1);
1508         if (add2 != 0)
1509                 i7core_rdimm_update_errcount(mci, chan, 2, add2);
1510
1511 }
1512
1513 static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1514 {
1515         struct i7core_pvt *pvt = mci->pvt_info;
1516         u32 rcv[3][2];
1517         int i, new0, new1, new2;
1518
1519         /*Read DEV 3: FUN 2:  MC_COR_ECC_CNT regs directly*/
1520         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
1521                                                                 &rcv[0][0]);
1522         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
1523                                                                 &rcv[0][1]);
1524         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
1525                                                                 &rcv[1][0]);
1526         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
1527                                                                 &rcv[1][1]);
1528         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
1529                                                                 &rcv[2][0]);
1530         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
1531                                                                 &rcv[2][1]);
1532         for (i = 0 ; i < 3; i++) {
1533                 debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1534                         (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1535                 /*if the channel has 3 dimms*/
1536                 if (pvt->channel[i].dimms > 2) {
1537                         new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1538                         new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1539                         new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1540                 } else {
1541                         new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1542                                         DIMM_BOT_COR_ERR(rcv[i][0]);
1543                         new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1544                                         DIMM_BOT_COR_ERR(rcv[i][1]);
1545                         new2 = 0;
1546                 }
1547
1548                 i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
1549         }
1550 }
1551
1552 /* This function is based on the device 3 function 4 registers as described on:
1553  * Intel Xeon Processor 5500 Series Datasheet Volume 2
1554  *      http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1555  * also available at:
1556  *      http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1557  */
1558 static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1559 {
1560         struct i7core_pvt *pvt = mci->pvt_info;
1561         u32 rcv1, rcv0;
1562         int new0, new1, new2;
1563
1564         if (!pvt->pci_mcr[4]) {
1565                 debugf0("%s MCR registers not found\n", __func__);
1566                 return;
1567         }
1568
1569         /* Corrected test errors */
1570         pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1571         pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
1572
1573         /* Store the new values */
1574         new2 = DIMM2_COR_ERR(rcv1);
1575         new1 = DIMM1_COR_ERR(rcv0);
1576         new0 = DIMM0_COR_ERR(rcv0);
1577
1578         /* Updates CE counters if it is not the first time here */
1579         if (pvt->ce_count_available) {
1580                 /* Updates CE counters */
1581                 int add0, add1, add2;
1582
1583                 add2 = new2 - pvt->udimm_last_ce_count[2];
1584                 add1 = new1 - pvt->udimm_last_ce_count[1];
1585                 add0 = new0 - pvt->udimm_last_ce_count[0];
1586
1587                 if (add2 < 0)
1588                         add2 += 0x7fff;
1589                 pvt->udimm_ce_count[2] += add2;
1590
1591                 if (add1 < 0)
1592                         add1 += 0x7fff;
1593                 pvt->udimm_ce_count[1] += add1;
1594
1595                 if (add0 < 0)
1596                         add0 += 0x7fff;
1597                 pvt->udimm_ce_count[0] += add0;
1598
1599                 if (add0 | add1 | add2)
1600                         i7core_printk(KERN_ERR, "New Corrected error(s): "
1601                                       "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1602                                       add0, add1, add2);
1603         } else
1604                 pvt->ce_count_available = 1;
1605
1606         /* Store the new values */
1607         pvt->udimm_last_ce_count[2] = new2;
1608         pvt->udimm_last_ce_count[1] = new1;
1609         pvt->udimm_last_ce_count[0] = new0;
1610 }
1611
1612 /*
1613  * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1614  * Architectures Software Developer’s Manual Volume 3B.
1615  * Nehalem are defined as family 0x06, model 0x1a
1616  *
1617  * The MCA registers used here are the following ones:
1618  *     struct mce field MCA Register
1619  *     m->status        MSR_IA32_MC8_STATUS
1620  *     m->addr          MSR_IA32_MC8_ADDR
1621  *     m->misc          MSR_IA32_MC8_MISC
1622  * In the case of Nehalem, the error information is masked at .status and .misc
1623  * fields
1624  */
1625 static void i7core_mce_output_error(struct mem_ctl_info *mci,
1626                                     const struct mce *m)
1627 {
1628         struct i7core_pvt *pvt = mci->pvt_info;
1629         char *type, *optype, *err, msg[80];
1630         enum hw_event_mc_err_type tp_event;
1631         unsigned long error = m->status & 0x1ff0000l;
1632         bool uncorrected_error = m->mcgstatus & 1ll << 61;
1633         bool ripv = m->mcgstatus & 1;
1634         u32 optypenum = (m->status >> 4) & 0x07;
1635         u32 core_err_cnt = (m->status >> 38) & 0x7fff;
1636         u32 dimm = (m->misc >> 16) & 0x3;
1637         u32 channel = (m->misc >> 18) & 0x3;
1638         u32 syndrome = m->misc >> 32;
1639         u32 errnum = find_first_bit(&error, 32);
1640
1641         if (uncorrected_error) {
1642                 if (ripv) {
1643                         type = "FATAL";
1644                         tp_event = HW_EVENT_ERR_FATAL;
1645                 } else {
1646                         type = "NON_FATAL";
1647                         tp_event = HW_EVENT_ERR_UNCORRECTED;
1648                 }
1649         } else {
1650                 type = "CORRECTED";
1651                 tp_event = HW_EVENT_ERR_CORRECTED;
1652         }
1653
1654         switch (optypenum) {
1655         case 0:
1656                 optype = "generic undef request";
1657                 break;
1658         case 1:
1659                 optype = "read error";
1660                 break;
1661         case 2:
1662                 optype = "write error";
1663                 break;
1664         case 3:
1665                 optype = "addr/cmd error";
1666                 break;
1667         case 4:
1668                 optype = "scrubbing error";
1669                 break;
1670         default:
1671                 optype = "reserved";
1672                 break;
1673         }
1674
1675         switch (errnum) {
1676         case 16:
1677                 err = "read ECC error";
1678                 break;
1679         case 17:
1680                 err = "RAS ECC error";
1681                 break;
1682         case 18:
1683                 err = "write parity error";
1684                 break;
1685         case 19:
1686                 err = "redundacy loss";
1687                 break;
1688         case 20:
1689                 err = "reserved";
1690                 break;
1691         case 21:
1692                 err = "memory range error";
1693                 break;
1694         case 22:
1695                 err = "RTID out of range";
1696                 break;
1697         case 23:
1698                 err = "address parity error";
1699                 break;
1700         case 24:
1701                 err = "byte enable parity error";
1702                 break;
1703         default:
1704                 err = "unknown";
1705         }
1706
1707         snprintf(msg, sizeof(msg), "count=%d %s", core_err_cnt, optype);
1708
1709         /*
1710          * Call the helper to output message
1711          * FIXME: what to do if core_err_cnt > 1? Currently, it generates
1712          * only one event
1713          */
1714         if (uncorrected_error || !pvt->is_registered)
1715                 edac_mc_handle_error(tp_event, mci,
1716                                      m->addr >> PAGE_SHIFT,
1717                                      m->addr & ~PAGE_MASK,
1718                                      syndrome,
1719                                      channel, dimm, -1,
1720                                      err, msg, m);
1721 }
1722
1723 /*
1724  *      i7core_check_error      Retrieve and process errors reported by the
1725  *                              hardware. Called by the Core module.
1726  */
1727 static void i7core_check_error(struct mem_ctl_info *mci)
1728 {
1729         struct i7core_pvt *pvt = mci->pvt_info;
1730         int i;
1731         unsigned count = 0;
1732         struct mce *m;
1733
1734         /*
1735          * MCE first step: Copy all mce errors into a temporary buffer
1736          * We use a double buffering here, to reduce the risk of
1737          * losing an error.
1738          */
1739         smp_rmb();
1740         count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
1741                 % MCE_LOG_LEN;
1742         if (!count)
1743                 goto check_ce_error;
1744
1745         m = pvt->mce_outentry;
1746         if (pvt->mce_in + count > MCE_LOG_LEN) {
1747                 unsigned l = MCE_LOG_LEN - pvt->mce_in;
1748
1749                 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
1750                 smp_wmb();
1751                 pvt->mce_in = 0;
1752                 count -= l;
1753                 m += l;
1754         }
1755         memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
1756         smp_wmb();
1757         pvt->mce_in += count;
1758
1759         smp_rmb();
1760         if (pvt->mce_overrun) {
1761                 i7core_printk(KERN_ERR, "Lost %d memory errors\n",
1762                               pvt->mce_overrun);
1763                 smp_wmb();
1764                 pvt->mce_overrun = 0;
1765         }
1766
1767         /*
1768          * MCE second step: parse errors and display
1769          */
1770         for (i = 0; i < count; i++)
1771                 i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
1772
1773         /*
1774          * Now, let's increment CE error counts
1775          */
1776 check_ce_error:
1777         if (!pvt->is_registered)
1778                 i7core_udimm_check_mc_ecc_err(mci);
1779         else
1780                 i7core_rdimm_check_mc_ecc_err(mci);
1781 }
1782
1783 /*
1784  * i7core_mce_check_error       Replicates mcelog routine to get errors
1785  *                              This routine simply queues mcelog errors, and
1786  *                              return. The error itself should be handled later
1787  *                              by i7core_check_error.
1788  * WARNING: As this routine should be called at NMI time, extra care should
1789  * be taken to avoid deadlocks, and to be as fast as possible.
1790  */
1791 static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
1792                                   void *data)
1793 {
1794         struct mce *mce = (struct mce *)data;
1795         struct i7core_dev *i7_dev;
1796         struct mem_ctl_info *mci;
1797         struct i7core_pvt *pvt;
1798
1799         i7_dev = get_i7core_dev(mce->socketid);
1800         if (!i7_dev)
1801                 return NOTIFY_BAD;
1802
1803         mci = i7_dev->mci;
1804         pvt = mci->pvt_info;
1805
1806         /*
1807          * Just let mcelog handle it if the error is
1808          * outside the memory controller
1809          */
1810         if (((mce->status & 0xffff) >> 7) != 1)
1811                 return NOTIFY_DONE;
1812
1813         /* Bank 8 registers are the only ones that we know how to handle */
1814         if (mce->bank != 8)
1815                 return NOTIFY_DONE;
1816
1817 #ifdef CONFIG_SMP
1818         /* Only handle if it is the right mc controller */
1819         if (mce->socketid != pvt->i7core_dev->socket)
1820                 return NOTIFY_DONE;
1821 #endif
1822
1823         smp_rmb();
1824         if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
1825                 smp_wmb();
1826                 pvt->mce_overrun++;
1827                 return NOTIFY_DONE;
1828         }
1829
1830         /* Copy memory error at the ringbuffer */
1831         memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
1832         smp_wmb();
1833         pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
1834
1835         /* Handle fatal errors immediately */
1836         if (mce->mcgstatus & 1)
1837                 i7core_check_error(mci);
1838
1839         /* Advise mcelog that the errors were handled */
1840         return NOTIFY_STOP;
1841 }
1842
1843 static struct notifier_block i7_mce_dec = {
1844         .notifier_call  = i7core_mce_check_error,
1845 };
1846
1847 struct memdev_dmi_entry {
1848         u8 type;
1849         u8 length;
1850         u16 handle;
1851         u16 phys_mem_array_handle;
1852         u16 mem_err_info_handle;
1853         u16 total_width;
1854         u16 data_width;
1855         u16 size;
1856         u8 form;
1857         u8 device_set;
1858         u8 device_locator;
1859         u8 bank_locator;
1860         u8 memory_type;
1861         u16 type_detail;
1862         u16 speed;
1863         u8 manufacturer;
1864         u8 serial_number;
1865         u8 asset_tag;
1866         u8 part_number;
1867         u8 attributes;
1868         u32 extended_size;
1869         u16 conf_mem_clk_speed;
1870 } __attribute__((__packed__));
1871
1872
1873 /*
1874  * Decode the DRAM Clock Frequency, be paranoid, make sure that all
1875  * memory devices show the same speed, and if they don't then consider
1876  * all speeds to be invalid.
1877  */
1878 static void decode_dclk(const struct dmi_header *dh, void *_dclk_freq)
1879 {
1880         int *dclk_freq = _dclk_freq;
1881         u16 dmi_mem_clk_speed;
1882
1883         if (*dclk_freq == -1)
1884                 return;
1885
1886         if (dh->type == DMI_ENTRY_MEM_DEVICE) {
1887                 struct memdev_dmi_entry *memdev_dmi_entry =
1888                         (struct memdev_dmi_entry *)dh;
1889                 unsigned long conf_mem_clk_speed_offset =
1890                         (unsigned long)&memdev_dmi_entry->conf_mem_clk_speed -
1891                         (unsigned long)&memdev_dmi_entry->type;
1892                 unsigned long speed_offset =
1893                         (unsigned long)&memdev_dmi_entry->speed -
1894                         (unsigned long)&memdev_dmi_entry->type;
1895
1896                 /* Check that a DIMM is present */
1897                 if (memdev_dmi_entry->size == 0)
1898                         return;
1899
1900                 /*
1901                  * Pick the configured speed if it's available, otherwise
1902                  * pick the DIMM speed, or we don't have a speed.
1903                  */
1904                 if (memdev_dmi_entry->length > conf_mem_clk_speed_offset) {
1905                         dmi_mem_clk_speed =
1906                                 memdev_dmi_entry->conf_mem_clk_speed;
1907                 } else if (memdev_dmi_entry->length > speed_offset) {
1908                         dmi_mem_clk_speed = memdev_dmi_entry->speed;
1909                 } else {
1910                         *dclk_freq = -1;
1911                         return;
1912                 }
1913
1914                 if (*dclk_freq == 0) {
1915                         /* First pass, speed was 0 */
1916                         if (dmi_mem_clk_speed > 0) {
1917                                 /* Set speed if a valid speed is read */
1918                                 *dclk_freq = dmi_mem_clk_speed;
1919                         } else {
1920                                 /* Otherwise we don't have a valid speed */
1921                                 *dclk_freq = -1;
1922                         }
1923                 } else if (*dclk_freq > 0 &&
1924                            *dclk_freq != dmi_mem_clk_speed) {
1925                         /*
1926                          * If we have a speed, check that all DIMMS are the same
1927                          * speed, otherwise set the speed as invalid.
1928                          */
1929                         *dclk_freq = -1;
1930                 }
1931         }
1932 }
1933
1934 /*
1935  * The default DCLK frequency is used as a fallback if we
1936  * fail to find anything reliable in the DMI. The value
1937  * is taken straight from the datasheet.
1938  */
1939 #define DEFAULT_DCLK_FREQ 800
1940
1941 static int get_dclk_freq(void)
1942 {
1943         int dclk_freq = 0;
1944
1945         dmi_walk(decode_dclk, (void *)&dclk_freq);
1946
1947         if (dclk_freq < 1)
1948                 return DEFAULT_DCLK_FREQ;
1949
1950         return dclk_freq;
1951 }
1952
1953 /*
1954  * set_sdram_scrub_rate         This routine sets byte/sec bandwidth scrub rate
1955  *                              to hardware according to SCRUBINTERVAL formula
1956  *                              found in datasheet.
1957  */
1958 static int set_sdram_scrub_rate(struct mem_ctl_info *mci, u32 new_bw)
1959 {
1960         struct i7core_pvt *pvt = mci->pvt_info;
1961         struct pci_dev *pdev;
1962         u32 dw_scrub;
1963         u32 dw_ssr;
1964
1965         /* Get data from the MC register, function 2 */
1966         pdev = pvt->pci_mcr[2];
1967         if (!pdev)
1968                 return -ENODEV;
1969
1970         pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &dw_scrub);
1971
1972         if (new_bw == 0) {
1973                 /* Prepare to disable petrol scrub */
1974                 dw_scrub &= ~STARTSCRUB;
1975                 /* Stop the patrol scrub engine */
1976                 write_and_test(pdev, MC_SCRUB_CONTROL,
1977                                dw_scrub & ~SCRUBINTERVAL_MASK);
1978
1979                 /* Get current status of scrub rate and set bit to disable */
1980                 pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr);
1981                 dw_ssr &= ~SSR_MODE_MASK;
1982                 dw_ssr |= SSR_MODE_DISABLE;
1983         } else {
1984                 const int cache_line_size = 64;
1985                 const u32 freq_dclk_mhz = pvt->dclk_freq;
1986                 unsigned long long scrub_interval;
1987                 /*
1988                  * Translate the desired scrub rate to a register value and
1989                  * program the corresponding register value.
1990                  */
1991                 scrub_interval = (unsigned long long)freq_dclk_mhz *
1992                         cache_line_size * 1000000;
1993                 do_div(scrub_interval, new_bw);
1994
1995                 if (!scrub_interval || scrub_interval > SCRUBINTERVAL_MASK)
1996                         return -EINVAL;
1997
1998                 dw_scrub = SCRUBINTERVAL_MASK & scrub_interval;
1999
2000                 /* Start the patrol scrub engine */
2001                 pci_write_config_dword(pdev, MC_SCRUB_CONTROL,
2002                                        STARTSCRUB | dw_scrub);
2003
2004                 /* Get current status of scrub rate and set bit to enable */
2005                 pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr);
2006                 dw_ssr &= ~SSR_MODE_MASK;
2007                 dw_ssr |= SSR_MODE_ENABLE;
2008         }
2009         /* Disable or enable scrubbing */
2010         pci_write_config_dword(pdev, MC_SSRCONTROL, dw_ssr);
2011
2012         return new_bw;
2013 }
2014
2015 /*
2016  * get_sdram_scrub_rate         This routine convert current scrub rate value
2017  *                              into byte/sec bandwidth according to
2018  *                              SCRUBINTERVAL formula found in datasheet.
2019  */
2020 static int get_sdram_scrub_rate(struct mem_ctl_info *mci)
2021 {
2022         struct i7core_pvt *pvt = mci->pvt_info;
2023         struct pci_dev *pdev;
2024         const u32 cache_line_size = 64;
2025         const u32 freq_dclk_mhz = pvt->dclk_freq;
2026         unsigned long long scrub_rate;
2027         u32 scrubval;
2028
2029         /* Get data from the MC register, function 2 */
2030         pdev = pvt->pci_mcr[2];
2031         if (!pdev)
2032                 return -ENODEV;
2033
2034         /* Get current scrub control data */
2035         pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &scrubval);
2036
2037         /* Mask highest 8-bits to 0 */
2038         scrubval &=  SCRUBINTERVAL_MASK;
2039         if (!scrubval)
2040                 return 0;
2041
2042         /* Calculate scrub rate value into byte/sec bandwidth */
2043         scrub_rate =  (unsigned long long)freq_dclk_mhz *
2044                 1000000 * cache_line_size;
2045         do_div(scrub_rate, scrubval);
2046         return (int)scrub_rate;
2047 }
2048
2049 static void enable_sdram_scrub_setting(struct mem_ctl_info *mci)
2050 {
2051         struct i7core_pvt *pvt = mci->pvt_info;
2052         u32 pci_lock;
2053
2054         /* Unlock writes to pci registers */
2055         pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock);
2056         pci_lock &= ~0x3;
2057         pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL,
2058                                pci_lock | MC_CFG_UNLOCK);
2059
2060         mci->set_sdram_scrub_rate = set_sdram_scrub_rate;
2061         mci->get_sdram_scrub_rate = get_sdram_scrub_rate;
2062 }
2063
2064 static void disable_sdram_scrub_setting(struct mem_ctl_info *mci)
2065 {
2066         struct i7core_pvt *pvt = mci->pvt_info;
2067         u32 pci_lock;
2068
2069         /* Lock writes to pci registers */
2070         pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock);
2071         pci_lock &= ~0x3;
2072         pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL,
2073                                pci_lock | MC_CFG_LOCK);
2074 }
2075
2076 static void i7core_pci_ctl_create(struct i7core_pvt *pvt)
2077 {
2078         pvt->i7core_pci = edac_pci_create_generic_ctl(
2079                                                 &pvt->i7core_dev->pdev[0]->dev,
2080                                                 EDAC_MOD_STR);
2081         if (unlikely(!pvt->i7core_pci))
2082                 i7core_printk(KERN_WARNING,
2083                               "Unable to setup PCI error report via EDAC\n");
2084 }
2085
2086 static void i7core_pci_ctl_release(struct i7core_pvt *pvt)
2087 {
2088         if (likely(pvt->i7core_pci))
2089                 edac_pci_release_generic_ctl(pvt->i7core_pci);
2090         else
2091                 i7core_printk(KERN_ERR,
2092                                 "Couldn't find mem_ctl_info for socket %d\n",
2093                                 pvt->i7core_dev->socket);
2094         pvt->i7core_pci = NULL;
2095 }
2096
2097 static void i7core_unregister_mci(struct i7core_dev *i7core_dev)
2098 {
2099         struct mem_ctl_info *mci = i7core_dev->mci;
2100         struct i7core_pvt *pvt;
2101
2102         if (unlikely(!mci || !mci->pvt_info)) {
2103                 debugf0("MC: " __FILE__ ": %s(): dev = %p\n",
2104                         __func__, &i7core_dev->pdev[0]->dev);
2105
2106                 i7core_printk(KERN_ERR, "Couldn't find mci handler\n");
2107                 return;
2108         }
2109
2110         pvt = mci->pvt_info;
2111
2112         debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
2113                 __func__, mci, &i7core_dev->pdev[0]->dev);
2114
2115         /* Disable scrubrate setting */
2116         if (pvt->enable_scrub)
2117                 disable_sdram_scrub_setting(mci);
2118
2119         mce_unregister_decode_chain(&i7_mce_dec);
2120
2121         /* Disable EDAC polling */
2122         i7core_pci_ctl_release(pvt);
2123
2124         /* Remove MC sysfs nodes */
2125         edac_mc_del_mc(mci->dev);
2126
2127         debugf1("%s: free mci struct\n", mci->ctl_name);
2128         kfree(mci->ctl_name);
2129         edac_mc_free(mci);
2130         i7core_dev->mci = NULL;
2131 }
2132
2133 static int i7core_register_mci(struct i7core_dev *i7core_dev)
2134 {
2135         struct mem_ctl_info *mci;
2136         struct i7core_pvt *pvt;
2137         int rc;
2138         struct edac_mc_layer layers[2];
2139
2140         /* allocate a new MC control structure */
2141
2142         layers[0].type = EDAC_MC_LAYER_CHANNEL;
2143         layers[0].size = NUM_CHANS;
2144         layers[0].is_virt_csrow = false;
2145         layers[1].type = EDAC_MC_LAYER_SLOT;
2146         layers[1].size = MAX_DIMMS;
2147         layers[1].is_virt_csrow = true;
2148         mci = edac_mc_alloc(i7core_dev->socket, ARRAY_SIZE(layers), layers,
2149                             sizeof(*pvt));
2150         if (unlikely(!mci))
2151                 return -ENOMEM;
2152
2153         debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
2154                 __func__, mci, &i7core_dev->pdev[0]->dev);
2155
2156         pvt = mci->pvt_info;
2157         memset(pvt, 0, sizeof(*pvt));
2158
2159         /* Associates i7core_dev and mci for future usage */
2160         pvt->i7core_dev = i7core_dev;
2161         i7core_dev->mci = mci;
2162
2163         /*
2164          * FIXME: how to handle RDDR3 at MCI level? It is possible to have
2165          * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
2166          * memory channels
2167          */
2168         mci->mtype_cap = MEM_FLAG_DDR3;
2169         mci->edac_ctl_cap = EDAC_FLAG_NONE;
2170         mci->edac_cap = EDAC_FLAG_NONE;
2171         mci->mod_name = "i7core_edac.c";
2172         mci->mod_ver = I7CORE_REVISION;
2173         mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d",
2174                                   i7core_dev->socket);
2175         mci->dev_name = pci_name(i7core_dev->pdev[0]);
2176         mci->ctl_page_to_phys = NULL;
2177
2178         /* Store pci devices at mci for faster access */
2179         rc = mci_bind_devs(mci, i7core_dev);
2180         if (unlikely(rc < 0))
2181                 goto fail0;
2182
2183         if (pvt->is_registered)
2184                 mci->mc_driver_sysfs_attributes = i7core_sysfs_rdimm_attrs;
2185         else
2186                 mci->mc_driver_sysfs_attributes = i7core_sysfs_udimm_attrs;
2187
2188         /* Get dimm basic config */
2189         get_dimm_config(mci);
2190         /* record ptr to the generic device */
2191         mci->dev = &i7core_dev->pdev[0]->dev;
2192         /* Set the function pointer to an actual operation function */
2193         mci->edac_check = i7core_check_error;
2194
2195         /* Enable scrubrate setting */
2196         if (pvt->enable_scrub)
2197                 enable_sdram_scrub_setting(mci);
2198
2199         /* add this new MC control structure to EDAC's list of MCs */
2200         if (unlikely(edac_mc_add_mc(mci))) {
2201                 debugf0("MC: " __FILE__
2202                         ": %s(): failed edac_mc_add_mc()\n", __func__);
2203                 /* FIXME: perhaps some code should go here that disables error
2204                  * reporting if we just enabled it
2205                  */
2206
2207                 rc = -EINVAL;
2208                 goto fail0;
2209         }
2210
2211         /* Default error mask is any memory */
2212         pvt->inject.channel = 0;
2213         pvt->inject.dimm = -1;
2214         pvt->inject.rank = -1;
2215         pvt->inject.bank = -1;
2216         pvt->inject.page = -1;
2217         pvt->inject.col = -1;
2218
2219         /* allocating generic PCI control info */
2220         i7core_pci_ctl_create(pvt);
2221
2222         /* DCLK for scrub rate setting */
2223         pvt->dclk_freq = get_dclk_freq();
2224
2225         mce_register_decode_chain(&i7_mce_dec);
2226
2227         return 0;
2228
2229 fail0:
2230         kfree(mci->ctl_name);
2231         edac_mc_free(mci);
2232         i7core_dev->mci = NULL;
2233         return rc;
2234 }
2235
2236 /*
2237  *      i7core_probe    Probe for ONE instance of device to see if it is
2238  *                      present.
2239  *      return:
2240  *              0 for FOUND a device
2241  *              < 0 for error code
2242  */
2243
2244 static int __devinit i7core_probe(struct pci_dev *pdev,
2245                                   const struct pci_device_id *id)
2246 {
2247         int rc, count = 0;
2248         struct i7core_dev *i7core_dev;
2249
2250         /* get the pci devices we want to reserve for our use */
2251         mutex_lock(&i7core_edac_lock);
2252
2253         /*
2254          * All memory controllers are allocated at the first pass.
2255          */
2256         if (unlikely(probed >= 1)) {
2257                 mutex_unlock(&i7core_edac_lock);
2258                 return -ENODEV;
2259         }
2260         probed++;
2261
2262         rc = i7core_get_all_devices();
2263         if (unlikely(rc < 0))
2264                 goto fail0;
2265
2266         list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
2267                 count++;
2268                 rc = i7core_register_mci(i7core_dev);
2269                 if (unlikely(rc < 0))
2270                         goto fail1;
2271         }
2272
2273         /*
2274          * Nehalem-EX uses a different memory controller. However, as the
2275          * memory controller is not visible on some Nehalem/Nehalem-EP, we
2276          * need to indirectly probe via a X58 PCI device. The same devices
2277          * are found on (some) Nehalem-EX. So, on those machines, the
2278          * probe routine needs to return -ENODEV, as the actual Memory
2279          * Controller registers won't be detected.
2280          */
2281         if (!count) {
2282                 rc = -ENODEV;
2283                 goto fail1;
2284         }
2285
2286         i7core_printk(KERN_INFO,
2287                       "Driver loaded, %d memory controller(s) found.\n",
2288                       count);
2289
2290         mutex_unlock(&i7core_edac_lock);
2291         return 0;
2292
2293 fail1:
2294         list_for_each_entry(i7core_dev, &i7core_edac_list, list)
2295                 i7core_unregister_mci(i7core_dev);
2296
2297         i7core_put_all_devices();
2298 fail0:
2299         mutex_unlock(&i7core_edac_lock);
2300         return rc;
2301 }
2302
2303 /*
2304  *      i7core_remove   destructor for one instance of device
2305  *
2306  */
2307 static void __devexit i7core_remove(struct pci_dev *pdev)
2308 {
2309         struct i7core_dev *i7core_dev;
2310
2311         debugf0(__FILE__ ": %s()\n", __func__);
2312
2313         /*
2314          * we have a trouble here: pdev value for removal will be wrong, since
2315          * it will point to the X58 register used to detect that the machine
2316          * is a Nehalem or upper design. However, due to the way several PCI
2317          * devices are grouped together to provide MC functionality, we need
2318          * to use a different method for releasing the devices
2319          */
2320
2321         mutex_lock(&i7core_edac_lock);
2322
2323         if (unlikely(!probed)) {
2324                 mutex_unlock(&i7core_edac_lock);
2325                 return;
2326         }
2327
2328         list_for_each_entry(i7core_dev, &i7core_edac_list, list)
2329                 i7core_unregister_mci(i7core_dev);
2330
2331         /* Release PCI resources */
2332         i7core_put_all_devices();
2333
2334         probed--;
2335
2336         mutex_unlock(&i7core_edac_lock);
2337 }
2338
2339 MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
2340
2341 /*
2342  *      i7core_driver   pci_driver structure for this module
2343  *
2344  */
2345 static struct pci_driver i7core_driver = {
2346         .name     = "i7core_edac",
2347         .probe    = i7core_probe,
2348         .remove   = __devexit_p(i7core_remove),
2349         .id_table = i7core_pci_tbl,
2350 };
2351
2352 /*
2353  *      i7core_init             Module entry function
2354  *                      Try to initialize this module for its devices
2355  */
2356 static int __init i7core_init(void)
2357 {
2358         int pci_rc;
2359
2360         debugf2("MC: " __FILE__ ": %s()\n", __func__);
2361
2362         /* Ensure that the OPSTATE is set correctly for POLL or NMI */
2363         opstate_init();
2364
2365         if (use_pci_fixup)
2366                 i7core_xeon_pci_fixup(pci_dev_table);
2367
2368         pci_rc = pci_register_driver(&i7core_driver);
2369
2370         if (pci_rc >= 0)
2371                 return 0;
2372
2373         i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
2374                       pci_rc);
2375
2376         return pci_rc;
2377 }
2378
2379 /*
2380  *      i7core_exit()   Module exit function
2381  *                      Unregister the driver
2382  */
2383 static void __exit i7core_exit(void)
2384 {
2385         debugf2("MC: " __FILE__ ": %s()\n", __func__);
2386         pci_unregister_driver(&i7core_driver);
2387 }
2388
2389 module_init(i7core_init);
2390 module_exit(i7core_exit);
2391
2392 MODULE_LICENSE("GPL");
2393 MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
2394 MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
2395 MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
2396                    I7CORE_REVISION);
2397
2398 module_param(edac_op_state, int, 0444);
2399 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");