]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - arch/powerpc/platforms/pseries/nvram.c
742735acd8b6409fb2bd68fcc9ba706a2f97944e
[karo-tx-linux.git] / arch / powerpc / platforms / pseries / nvram.c
1 /*
2  *  c 2001 PPC 64 Team, IBM Corp
3  *
4  *      This program is free software; you can redistribute it and/or
5  *      modify it under the terms of the GNU General Public License
6  *      as published by the Free Software Foundation; either version
7  *      2 of the License, or (at your option) any later version.
8  *
9  * /dev/nvram driver for PPC64
10  *
11  * This perhaps should live in drivers/char
12  */
13
14
15 #include <linux/types.h>
16 #include <linux/errno.h>
17 #include <linux/init.h>
18 #include <linux/spinlock.h>
19 #include <linux/slab.h>
20 #include <linux/kmsg_dump.h>
21 #include <linux/ctype.h>
22 #include <linux/zlib.h>
23 #include <asm/uaccess.h>
24 #include <asm/nvram.h>
25 #include <asm/rtas.h>
26 #include <asm/prom.h>
27 #include <asm/machdep.h>
28
29 /* Max bytes to read/write in one go */
30 #define NVRW_CNT 0x20
31
32 /*
33  * Set oops header version to distingush between old and new format header.
34  * lnx,oops-log partition max size is 4000, header version > 4000 will
35  * help in identifying new header.
36  */
37 #define OOPS_HDR_VERSION 5000
38
39 static unsigned int nvram_size;
40 static int nvram_fetch, nvram_store;
41 static char nvram_buf[NVRW_CNT];        /* assume this is in the first 4GB */
42 static DEFINE_SPINLOCK(nvram_lock);
43
44 struct err_log_info {
45         int error_type;
46         unsigned int seq_num;
47 };
48
49 struct nvram_os_partition {
50         const char *name;
51         int req_size;   /* desired size, in bytes */
52         int min_size;   /* minimum acceptable size (0 means req_size) */
53         long size;      /* size of data portion (excluding err_log_info) */
54         long index;     /* offset of data portion of partition */
55 };
56
57 static struct nvram_os_partition rtas_log_partition = {
58         .name = "ibm,rtas-log",
59         .req_size = 2079,
60         .min_size = 1055,
61         .index = -1
62 };
63
64 static struct nvram_os_partition oops_log_partition = {
65         .name = "lnx,oops-log",
66         .req_size = 4000,
67         .min_size = 2000,
68         .index = -1
69 };
70
71 static const char *pseries_nvram_os_partitions[] = {
72         "ibm,rtas-log",
73         "lnx,oops-log",
74         NULL
75 };
76
77 struct oops_log_info {
78         u16 version;
79         u16 report_length;
80         u64 timestamp;
81 } __attribute__((packed));
82
83 static void oops_to_nvram(struct kmsg_dumper *dumper,
84                           enum kmsg_dump_reason reason);
85
86 static struct kmsg_dumper nvram_kmsg_dumper = {
87         .dump = oops_to_nvram
88 };
89
90 /* See clobbering_unread_rtas_event() */
91 #define NVRAM_RTAS_READ_TIMEOUT 5               /* seconds */
92 static unsigned long last_unread_rtas_event;    /* timestamp */
93
94 /*
95  * For capturing and compressing an oops or panic report...
96
97  * big_oops_buf[] holds the uncompressed text we're capturing.
98  *
99  * oops_buf[] holds the compressed text, preceded by a oops header.
100  * oops header has u16 holding the version of oops header (to differentiate
101  * between old and new format header) followed by u16 holding the length of
102  * the compressed* text (*Or uncompressed, if compression fails.) and u64
103  * holding the timestamp. oops_buf[] gets written to NVRAM.
104  *
105  * oops_log_info points to the header. oops_data points to the compressed text.
106  *
107  * +- oops_buf
108  * |                                   +- oops_data
109  * v                                   v
110  * +-----------+-----------+-----------+------------------------+
111  * | version   | length    | timestamp | text                   |
112  * | (2 bytes) | (2 bytes) | (8 bytes) | (oops_data_sz bytes)   |
113  * +-----------+-----------+-----------+------------------------+
114  * ^
115  * +- oops_log_info
116  *
117  * We preallocate these buffers during init to avoid kmalloc during oops/panic.
118  */
119 static size_t big_oops_buf_sz;
120 static char *big_oops_buf, *oops_buf;
121 static char *oops_data;
122 static size_t oops_data_sz;
123
124 /* Compression parameters */
125 #define COMPR_LEVEL 6
126 #define WINDOW_BITS 12
127 #define MEM_LEVEL 4
128 static struct z_stream_s stream;
129
130 static ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index)
131 {
132         unsigned int i;
133         unsigned long len;
134         int done;
135         unsigned long flags;
136         char *p = buf;
137
138
139         if (nvram_size == 0 || nvram_fetch == RTAS_UNKNOWN_SERVICE)
140                 return -ENODEV;
141
142         if (*index >= nvram_size)
143                 return 0;
144
145         i = *index;
146         if (i + count > nvram_size)
147                 count = nvram_size - i;
148
149         spin_lock_irqsave(&nvram_lock, flags);
150
151         for (; count != 0; count -= len) {
152                 len = count;
153                 if (len > NVRW_CNT)
154                         len = NVRW_CNT;
155                 
156                 if ((rtas_call(nvram_fetch, 3, 2, &done, i, __pa(nvram_buf),
157                                len) != 0) || len != done) {
158                         spin_unlock_irqrestore(&nvram_lock, flags);
159                         return -EIO;
160                 }
161                 
162                 memcpy(p, nvram_buf, len);
163
164                 p += len;
165                 i += len;
166         }
167
168         spin_unlock_irqrestore(&nvram_lock, flags);
169         
170         *index = i;
171         return p - buf;
172 }
173
174 static ssize_t pSeries_nvram_write(char *buf, size_t count, loff_t *index)
175 {
176         unsigned int i;
177         unsigned long len;
178         int done;
179         unsigned long flags;
180         const char *p = buf;
181
182         if (nvram_size == 0 || nvram_store == RTAS_UNKNOWN_SERVICE)
183                 return -ENODEV;
184
185         if (*index >= nvram_size)
186                 return 0;
187
188         i = *index;
189         if (i + count > nvram_size)
190                 count = nvram_size - i;
191
192         spin_lock_irqsave(&nvram_lock, flags);
193
194         for (; count != 0; count -= len) {
195                 len = count;
196                 if (len > NVRW_CNT)
197                         len = NVRW_CNT;
198
199                 memcpy(nvram_buf, p, len);
200
201                 if ((rtas_call(nvram_store, 3, 2, &done, i, __pa(nvram_buf),
202                                len) != 0) || len != done) {
203                         spin_unlock_irqrestore(&nvram_lock, flags);
204                         return -EIO;
205                 }
206                 
207                 p += len;
208                 i += len;
209         }
210         spin_unlock_irqrestore(&nvram_lock, flags);
211         
212         *index = i;
213         return p - buf;
214 }
215
216 static ssize_t pSeries_nvram_get_size(void)
217 {
218         return nvram_size ? nvram_size : -ENODEV;
219 }
220
221
222 /* nvram_write_os_partition, nvram_write_error_log
223  *
224  * We need to buffer the error logs into nvram to ensure that we have
225  * the failure information to decode.  If we have a severe error there
226  * is no way to guarantee that the OS or the machine is in a state to
227  * get back to user land and write the error to disk.  For example if
228  * the SCSI device driver causes a Machine Check by writing to a bad
229  * IO address, there is no way of guaranteeing that the device driver
230  * is in any state that is would also be able to write the error data
231  * captured to disk, thus we buffer it in NVRAM for analysis on the
232  * next boot.
233  *
234  * In NVRAM the partition containing the error log buffer will looks like:
235  * Header (in bytes):
236  * +-----------+----------+--------+------------+------------------+
237  * | signature | checksum | length | name       | data             |
238  * |0          |1         |2      3|4         15|16        length-1|
239  * +-----------+----------+--------+------------+------------------+
240  *
241  * The 'data' section would look like (in bytes):
242  * +--------------+------------+-----------------------------------+
243  * | event_logged | sequence # | error log                         |
244  * |0            3|4          7|8                  error_log_size-1|
245  * +--------------+------------+-----------------------------------+
246  *
247  * event_logged: 0 if event has not been logged to syslog, 1 if it has
248  * sequence #: The unique sequence # for each event. (until it wraps)
249  * error log: The error log from event_scan
250  */
251 int nvram_write_os_partition(struct nvram_os_partition *part, char * buff,
252                 int length, unsigned int err_type, unsigned int error_log_cnt)
253 {
254         int rc;
255         loff_t tmp_index;
256         struct err_log_info info;
257         
258         if (part->index == -1) {
259                 return -ESPIPE;
260         }
261
262         if (length > part->size) {
263                 length = part->size;
264         }
265
266         info.error_type = err_type;
267         info.seq_num = error_log_cnt;
268
269         tmp_index = part->index;
270
271         rc = ppc_md.nvram_write((char *)&info, sizeof(struct err_log_info), &tmp_index);
272         if (rc <= 0) {
273                 pr_err("%s: Failed nvram_write (%d)\n", __FUNCTION__, rc);
274                 return rc;
275         }
276
277         rc = ppc_md.nvram_write(buff, length, &tmp_index);
278         if (rc <= 0) {
279                 pr_err("%s: Failed nvram_write (%d)\n", __FUNCTION__, rc);
280                 return rc;
281         }
282         
283         return 0;
284 }
285
286 int nvram_write_error_log(char * buff, int length,
287                           unsigned int err_type, unsigned int error_log_cnt)
288 {
289         int rc = nvram_write_os_partition(&rtas_log_partition, buff, length,
290                                                 err_type, error_log_cnt);
291         if (!rc)
292                 last_unread_rtas_event = get_seconds();
293         return rc;
294 }
295
296 /* nvram_read_error_log
297  *
298  * Reads nvram for error log for at most 'length'
299  */
300 int nvram_read_error_log(char * buff, int length,
301                          unsigned int * err_type, unsigned int * error_log_cnt)
302 {
303         int rc;
304         loff_t tmp_index;
305         struct err_log_info info;
306         
307         if (rtas_log_partition.index == -1)
308                 return -1;
309
310         if (length > rtas_log_partition.size)
311                 length = rtas_log_partition.size;
312
313         tmp_index = rtas_log_partition.index;
314
315         rc = ppc_md.nvram_read((char *)&info, sizeof(struct err_log_info), &tmp_index);
316         if (rc <= 0) {
317                 printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc);
318                 return rc;
319         }
320
321         rc = ppc_md.nvram_read(buff, length, &tmp_index);
322         if (rc <= 0) {
323                 printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc);
324                 return rc;
325         }
326
327         *error_log_cnt = info.seq_num;
328         *err_type = info.error_type;
329
330         return 0;
331 }
332
333 /* This doesn't actually zero anything, but it sets the event_logged
334  * word to tell that this event is safely in syslog.
335  */
336 int nvram_clear_error_log(void)
337 {
338         loff_t tmp_index;
339         int clear_word = ERR_FLAG_ALREADY_LOGGED;
340         int rc;
341
342         if (rtas_log_partition.index == -1)
343                 return -1;
344
345         tmp_index = rtas_log_partition.index;
346         
347         rc = ppc_md.nvram_write((char *)&clear_word, sizeof(int), &tmp_index);
348         if (rc <= 0) {
349                 printk(KERN_ERR "nvram_clear_error_log: Failed nvram_write (%d)\n", rc);
350                 return rc;
351         }
352         last_unread_rtas_event = 0;
353
354         return 0;
355 }
356
357 /* pseries_nvram_init_os_partition
358  *
359  * This sets up a partition with an "OS" signature.
360  *
361  * The general strategy is the following:
362  * 1.) If a partition with the indicated name already exists...
363  *      - If it's large enough, use it.
364  *      - Otherwise, recycle it and keep going.
365  * 2.) Search for a free partition that is large enough.
366  * 3.) If there's not a free partition large enough, recycle any obsolete
367  * OS partitions and try again.
368  * 4.) Will first try getting a chunk that will satisfy the requested size.
369  * 5.) If a chunk of the requested size cannot be allocated, then try finding
370  * a chunk that will satisfy the minum needed.
371  *
372  * Returns 0 on success, else -1.
373  */
374 static int __init pseries_nvram_init_os_partition(struct nvram_os_partition
375                                                                         *part)
376 {
377         loff_t p;
378         int size;
379
380         /* Scan nvram for partitions */
381         nvram_scan_partitions();
382
383         /* Look for ours */
384         p = nvram_find_partition(part->name, NVRAM_SIG_OS, &size);
385
386         /* Found one but too small, remove it */
387         if (p && size < part->min_size) {
388                 pr_info("nvram: Found too small %s partition,"
389                                         " removing it...\n", part->name);
390                 nvram_remove_partition(part->name, NVRAM_SIG_OS, NULL);
391                 p = 0;
392         }
393
394         /* Create one if we didn't find */
395         if (!p) {
396                 p = nvram_create_partition(part->name, NVRAM_SIG_OS,
397                                         part->req_size, part->min_size);
398                 if (p == -ENOSPC) {
399                         pr_info("nvram: No room to create %s partition, "
400                                 "deleting any obsolete OS partitions...\n",
401                                 part->name);
402                         nvram_remove_partition(NULL, NVRAM_SIG_OS,
403                                                 pseries_nvram_os_partitions);
404                         p = nvram_create_partition(part->name, NVRAM_SIG_OS,
405                                         part->req_size, part->min_size);
406                 }
407         }
408
409         if (p <= 0) {
410                 pr_err("nvram: Failed to find or create %s"
411                        " partition, err %d\n", part->name, (int)p);
412                 return -1;
413         }
414
415         part->index = p;
416         part->size = nvram_get_partition_size(p) - sizeof(struct err_log_info);
417         
418         return 0;
419 }
420
421 static void __init nvram_init_oops_partition(int rtas_partition_exists)
422 {
423         int rc;
424
425         rc = pseries_nvram_init_os_partition(&oops_log_partition);
426         if (rc != 0) {
427                 if (!rtas_partition_exists)
428                         return;
429                 pr_notice("nvram: Using %s partition to log both"
430                         " RTAS errors and oops/panic reports\n",
431                         rtas_log_partition.name);
432                 memcpy(&oops_log_partition, &rtas_log_partition,
433                                                 sizeof(rtas_log_partition));
434         }
435         oops_buf = kmalloc(oops_log_partition.size, GFP_KERNEL);
436         if (!oops_buf) {
437                 pr_err("nvram: No memory for %s partition\n",
438                                                 oops_log_partition.name);
439                 return;
440         }
441         oops_data = oops_buf + sizeof(struct oops_log_info);
442         oops_data_sz = oops_log_partition.size - sizeof(struct oops_log_info);
443
444         /*
445          * Figure compression (preceded by elimination of each line's <n>
446          * severity prefix) will reduce the oops/panic report to at most
447          * 45% of its original size.
448          */
449         big_oops_buf_sz = (oops_data_sz * 100) / 45;
450         big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL);
451         if (big_oops_buf) {
452                 stream.workspace = kmalloc(zlib_deflate_workspacesize(
453                                 WINDOW_BITS, MEM_LEVEL), GFP_KERNEL);
454                 if (!stream.workspace) {
455                         pr_err("nvram: No memory for compression workspace; "
456                                 "skipping compression of %s partition data\n",
457                                 oops_log_partition.name);
458                         kfree(big_oops_buf);
459                         big_oops_buf = NULL;
460                 }
461         } else {
462                 pr_err("No memory for uncompressed %s data; "
463                         "skipping compression\n", oops_log_partition.name);
464                 stream.workspace = NULL;
465         }
466
467         rc = kmsg_dump_register(&nvram_kmsg_dumper);
468         if (rc != 0) {
469                 pr_err("nvram: kmsg_dump_register() failed; returned %d\n", rc);
470                 kfree(oops_buf);
471                 kfree(big_oops_buf);
472                 kfree(stream.workspace);
473         }
474 }
475
476 static int __init pseries_nvram_init_log_partitions(void)
477 {
478         int rc;
479
480         rc = pseries_nvram_init_os_partition(&rtas_log_partition);
481         nvram_init_oops_partition(rc == 0);
482         return 0;
483 }
484 machine_arch_initcall(pseries, pseries_nvram_init_log_partitions);
485
486 int __init pSeries_nvram_init(void)
487 {
488         struct device_node *nvram;
489         const unsigned int *nbytes_p;
490         unsigned int proplen;
491
492         nvram = of_find_node_by_type(NULL, "nvram");
493         if (nvram == NULL)
494                 return -ENODEV;
495
496         nbytes_p = of_get_property(nvram, "#bytes", &proplen);
497         if (nbytes_p == NULL || proplen != sizeof(unsigned int)) {
498                 of_node_put(nvram);
499                 return -EIO;
500         }
501
502         nvram_size = *nbytes_p;
503
504         nvram_fetch = rtas_token("nvram-fetch");
505         nvram_store = rtas_token("nvram-store");
506         printk(KERN_INFO "PPC64 nvram contains %d bytes\n", nvram_size);
507         of_node_put(nvram);
508
509         ppc_md.nvram_read       = pSeries_nvram_read;
510         ppc_md.nvram_write      = pSeries_nvram_write;
511         ppc_md.nvram_size       = pSeries_nvram_get_size;
512
513         return 0;
514 }
515
516 /*
517  * Are we using the ibm,rtas-log for oops/panic reports?  And if so,
518  * would logging this oops/panic overwrite an RTAS event that rtas_errd
519  * hasn't had a chance to read and process?  Return 1 if so, else 0.
520  *
521  * We assume that if rtas_errd hasn't read the RTAS event in
522  * NVRAM_RTAS_READ_TIMEOUT seconds, it's probably not going to.
523  */
524 static int clobbering_unread_rtas_event(void)
525 {
526         return (oops_log_partition.index == rtas_log_partition.index
527                 && last_unread_rtas_event
528                 && get_seconds() - last_unread_rtas_event <=
529                                                 NVRAM_RTAS_READ_TIMEOUT);
530 }
531
532 /* Derived from logfs_compress() */
533 static int nvram_compress(const void *in, void *out, size_t inlen,
534                                                         size_t outlen)
535 {
536         int err, ret;
537
538         ret = -EIO;
539         err = zlib_deflateInit2(&stream, COMPR_LEVEL, Z_DEFLATED, WINDOW_BITS,
540                                                 MEM_LEVEL, Z_DEFAULT_STRATEGY);
541         if (err != Z_OK)
542                 goto error;
543
544         stream.next_in = in;
545         stream.avail_in = inlen;
546         stream.total_in = 0;
547         stream.next_out = out;
548         stream.avail_out = outlen;
549         stream.total_out = 0;
550
551         err = zlib_deflate(&stream, Z_FINISH);
552         if (err != Z_STREAM_END)
553                 goto error;
554
555         err = zlib_deflateEnd(&stream);
556         if (err != Z_OK)
557                 goto error;
558
559         if (stream.total_out >= stream.total_in)
560                 goto error;
561
562         ret = stream.total_out;
563 error:
564         return ret;
565 }
566
567 /* Compress the text from big_oops_buf into oops_buf. */
568 static int zip_oops(size_t text_len)
569 {
570         struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf;
571         int zipped_len = nvram_compress(big_oops_buf, oops_data, text_len,
572                                                                 oops_data_sz);
573         if (zipped_len < 0) {
574                 pr_err("nvram: compression failed; returned %d\n", zipped_len);
575                 pr_err("nvram: logging uncompressed oops/panic report\n");
576                 return -1;
577         }
578         oops_hdr->version = OOPS_HDR_VERSION;
579         oops_hdr->report_length = (u16) zipped_len;
580         oops_hdr->timestamp = get_seconds();
581         return 0;
582 }
583
584 /*
585  * This is our kmsg_dump callback, called after an oops or panic report
586  * has been written to the printk buffer.  We want to capture as much
587  * of the printk buffer as possible.  First, capture as much as we can
588  * that we think will compress sufficiently to fit in the lnx,oops-log
589  * partition.  If that's too much, go back and capture uncompressed text.
590  */
591 static void oops_to_nvram(struct kmsg_dumper *dumper,
592                           enum kmsg_dump_reason reason)
593 {
594         struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf;
595         static unsigned int oops_count = 0;
596         static bool panicking = false;
597         static DEFINE_SPINLOCK(lock);
598         unsigned long flags;
599         size_t text_len;
600         unsigned int err_type = ERR_TYPE_KERNEL_PANIC_GZ;
601         int rc = -1;
602
603         switch (reason) {
604         case KMSG_DUMP_RESTART:
605         case KMSG_DUMP_HALT:
606         case KMSG_DUMP_POWEROFF:
607                 /* These are almost always orderly shutdowns. */
608                 return;
609         case KMSG_DUMP_OOPS:
610                 break;
611         case KMSG_DUMP_PANIC:
612                 panicking = true;
613                 break;
614         case KMSG_DUMP_EMERG:
615                 if (panicking)
616                         /* Panic report already captured. */
617                         return;
618                 break;
619         default:
620                 pr_err("%s: ignoring unrecognized KMSG_DUMP_* reason %d\n",
621                                                 __FUNCTION__, (int) reason);
622                 return;
623         }
624
625         if (clobbering_unread_rtas_event())
626                 return;
627
628         if (!spin_trylock_irqsave(&lock, flags))
629                 return;
630
631         if (big_oops_buf) {
632                 kmsg_dump_get_buffer(dumper, false,
633                                      big_oops_buf, big_oops_buf_sz, &text_len);
634                 rc = zip_oops(text_len);
635         }
636         if (rc != 0) {
637                 kmsg_dump_rewind(dumper);
638                 kmsg_dump_get_buffer(dumper, false,
639                                      oops_data, oops_data_sz, &text_len);
640                 err_type = ERR_TYPE_KERNEL_PANIC;
641                 oops_hdr->version = OOPS_HDR_VERSION;
642                 oops_hdr->report_length = (u16) text_len;
643                 oops_hdr->timestamp = get_seconds();
644         }
645
646         (void) nvram_write_os_partition(&oops_log_partition, oops_buf,
647                 (int) (sizeof(*oops_hdr) + oops_hdr->report_length), err_type,
648                 ++oops_count);
649
650         spin_unlock_irqrestore(&lock, flags);
651 }