2 * July 20, 2011 Bob Pearson <rpearson at systemfabricworks.com>
3 * added slice by 8 algorithm to the existing conventional and
4 * slice by 4 algorithms.
6 * Oct 15, 2000 Matt Domsch <Matt_Domsch@dell.com>
7 * Nicer crc32 functions/docs submitted by linux@horizon.com. Thanks!
8 * Code was from the public domain, copyright abandoned. Code was
9 * subsequently included in the kernel, thus was re-licensed under the
12 * Oct 12, 2000 Matt Domsch <Matt_Domsch@dell.com>
13 * Same crc32 function was used in 5 other places in the kernel.
14 * I made one version, and deleted the others.
15 * There are various incantations of crc32(). Some use a seed of 0 or ~0.
16 * Some xor at the end with ~0. The generic crc32() function takes
17 * seed as an argument, and doesn't xor at the end. Then individual
18 * users can do whatever they need.
19 * drivers/net/smc9194.c uses seed ~0, doesn't xor with ~0.
20 * fs/jffs2 uses seed 0, doesn't xor with ~0.
21 * fs/partitions/efi.c uses seed ~0, xor's with ~0.
23 * This source code is licensed under the GNU General Public License,
24 * Version 2. See the file COPYING for more details.
26 #include <linux/crc32.h>
27 #include <linux/kernel.h>
28 #include <linux/module.h>
29 #include <linux/compiler.h>
30 #include <linux/types.h>
31 #include <linux/init.h>
32 #include <linux/atomic.h>
33 #include "crc32defs.h"
38 # define tole(x) (__force u32) __constant_cpu_to_le32(x)
44 # define tobe(x) (__force u32) __constant_cpu_to_be32(x)
48 #include "crc32table.h"
50 MODULE_AUTHOR("Matt Domsch <Matt_Domsch@dell.com>");
51 MODULE_DESCRIPTION("Ethernet CRC32 calculations");
52 MODULE_LICENSE("GPL");
55 static inline u32 crc32_le_body(u32 crc, u8 const *buf, size_t len)
59 int init_bytes, end_bytes;
65 crc = (__force u32) __cpu_to_le32(crc);
69 p32 = (u32 *)(((uintptr_t)p8 + 7) & ~7);
71 init_bytes = (uintptr_t)p32 - (uintptr_t)p8;
74 words = (len - init_bytes) >> 3;
75 end_bytes = (len - init_bytes) & 7;
78 p32 = (u32 *)(((uintptr_t)p8 + 3) & ~3);
80 init_bytes = (uintptr_t)p32 - (uintptr_t)p8;
83 words = (len - init_bytes) >> 2;
84 end_bytes = (len - init_bytes) & 3;
87 for (i = 0; i < init_bytes; i++) {
88 #ifdef __LITTLE_ENDIAN
90 crc = t0_le[i0] ^ (crc >> 8);
92 i0 = *p8++ ^ (crc >> 24);
93 crc = t0_le[i0] ^ (crc << 8);
97 for (i = 0; i < words; i++) {
98 #ifdef __LITTLE_ENDIAN
99 # if CRC_LE_BITS == 64
100 /* slice by 8 algorithm */
106 crc = t7_le[i3] ^ t6_le[i2] ^ t5_le[i1] ^ t4_le[i0];
113 crc ^= t3_le[i3] ^ t2_le[i2] ^ t1_le[i1] ^ t0_le[i0];
115 /* slice by 4 algorithm */
121 crc = t3_le[i3] ^ t2_le[i2] ^ t1_le[i1] ^ t0_le[i0];
124 # if CRC_LE_BITS == 64
130 crc = t7_le[i3] ^ t6_le[i2] ^ t5_le[i1] ^ t4_le[i0];
137 crc ^= t3_le[i3] ^ t2_le[i2] ^ t1_le[i1] ^ t0_le[i0];
144 crc = t3_le[i3] ^ t2_le[i2] ^ t1_le[i1] ^ t0_le[i0];
151 for (i = 0; i < end_bytes; i++) {
152 #ifdef __LITTLE_ENDIAN
154 crc = t0_le[i0] ^ (crc >> 8);
156 i0 = *p8++ ^ (crc >> 24);
157 crc = t0_le[i0] ^ (crc << 8);
161 return __le32_to_cpu((__force __le32)crc);
166 static inline u32 crc32_be_body(u32 crc, u8 const *buf, size_t len)
170 int init_bytes, end_bytes;
176 crc = (__force u32) __cpu_to_be32(crc);
178 #if CRC_LE_BITS == 64
180 p32 = (u32 *)(((uintptr_t)p8 + 7) & ~7);
182 init_bytes = (uintptr_t)p32 - (uintptr_t)p8;
183 if (init_bytes > len)
185 words = (len - init_bytes) >> 3;
186 end_bytes = (len - init_bytes) & 7;
189 p32 = (u32 *)(((uintptr_t)p8 + 3) & ~3);
191 init_bytes = (uintptr_t)p32 - (uintptr_t)p8;
192 if (init_bytes > len)
194 words = (len - init_bytes) >> 2;
195 end_bytes = (len - init_bytes) & 3;
198 for (i = 0; i < init_bytes; i++) {
199 #ifdef __LITTLE_ENDIAN
201 crc = t0_be[i0] ^ (crc >> 8);
203 i0 = *p8++ ^ (crc >> 24);
204 crc = t0_be[i0] ^ (crc << 8);
208 for (i = 0; i < words; i++) {
209 #ifdef __LITTLE_ENDIAN
210 # if CRC_LE_BITS == 64
211 /* slice by 8 algorithm */
217 crc = t7_be[i3] ^ t6_be[i2] ^ t5_be[i1] ^ t4_be[i0];
224 crc ^= t3_be[i3] ^ t2_be[i2] ^ t1_be[i1] ^ t0_be[i0];
226 /* slice by 4 algorithm */
232 crc = t3_be[i3] ^ t2_be[i2] ^ t1_be[i1] ^ t0_be[i0];
235 # if CRC_LE_BITS == 64
241 crc = t7_be[i3] ^ t6_be[i2] ^ t5_be[i1] ^ t4_be[i0];
248 crc ^= t3_be[i3] ^ t2_be[i2] ^ t1_be[i1] ^ t0_be[i0];
255 crc = t3_be[i3] ^ t2_be[i2] ^ t1_be[i1] ^ t0_be[i0];
262 for (i = 0; i < end_bytes; i++) {
263 #ifdef __LITTLE_ENDIAN
265 crc = t0_be[i0] ^ (crc >> 8);
267 i0 = *p8++ ^ (crc >> 24);
268 crc = t0_be[i0] ^ (crc << 8);
272 return __be32_to_cpu((__force __be32)crc);
277 * crc32_le() - Calculate bitwise little-endian Ethernet AUTODIN II CRC32
278 * @crc: seed value for computation. ~0 for Ethernet, sometimes 0 for
279 * other uses, or the previous crc32 value if computing incrementally.
280 * @p: pointer to buffer over which CRC is run
281 * @len: length of buffer @p
283 u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
289 for (i = 0; i < 8; i++)
290 crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0);
292 # elif CRC_LE_BITS == 2
295 crc = (crc >> 2) ^ t0_le[crc & 0x03];
296 crc = (crc >> 2) ^ t0_le[crc & 0x03];
297 crc = (crc >> 2) ^ t0_le[crc & 0x03];
298 crc = (crc >> 2) ^ t0_le[crc & 0x03];
300 # elif CRC_LE_BITS == 4
303 crc = (crc >> 4) ^ t0_le[crc & 0x0f];
304 crc = (crc >> 4) ^ t0_le[crc & 0x0f];
306 # elif CRC_LE_BITS == 8
309 crc = (crc >> 8) ^ t0_le[crc & 0xff];
312 crc = crc32_le_body(crc, p, len);
316 EXPORT_SYMBOL(crc32_le);
319 * crc32_be() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32
320 * @crc: seed value for computation. ~0 for Ethernet, sometimes 0 for
321 * other uses, or the previous crc32 value if computing incrementally.
322 * @p: pointer to buffer over which CRC is run
323 * @len: length of buffer @p
325 u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
331 for (i = 0; i < 8; i++)
333 ((crc & 0x80000000) ? CRCPOLY_BE : 0);
335 # elif CRC_BE_BITS == 2
338 crc = (crc << 2) ^ t0_be[crc >> 30];
339 crc = (crc << 2) ^ t0_be[crc >> 30];
340 crc = (crc << 2) ^ t0_be[crc >> 30];
341 crc = (crc << 2) ^ t0_be[crc >> 30];
343 # elif CRC_BE_BITS == 4
346 crc = (crc << 4) ^ t0_be[crc >> 28];
347 crc = (crc << 4) ^ t0_be[crc >> 28];
349 # elif CRC_BE_BITS == 8
352 crc = (crc << 8) ^ t0_be[crc >> 24];
355 crc = crc32_be_body(crc, p, len);
359 EXPORT_SYMBOL(crc32_be);
362 * A brief CRC tutorial.
364 * A CRC is a long-division remainder. You add the CRC to the message,
365 * and the whole thing (message+CRC) is a multiple of the given
366 * CRC polynomial. To check the CRC, you can either check that the
367 * CRC matches the recomputed value, *or* you can check that the
368 * remainder computed on the message+CRC is 0. This latter approach
369 * is used by a lot of hardware implementations, and is why so many
370 * protocols put the end-of-frame flag after the CRC.
372 * It's actually the same long division you learned in school, except that
373 * - We're working in binary, so the digits are only 0 and 1, and
374 * - When dividing polynomials, there are no carries. Rather than add and
375 * subtract, we just xor. Thus, we tend to get a bit sloppy about
376 * the difference between adding and subtracting.
378 * A 32-bit CRC polynomial is actually 33 bits long. But since it's
379 * 33 bits long, bit 32 is always going to be set, so usually the CRC
380 * is written in hex with the most significant bit omitted. (If you're
381 * familiar with the IEEE 754 floating-point format, it's the same idea.)
383 * Note that a CRC is computed over a string of *bits*, so you have
384 * to decide on the endianness of the bits within each byte. To get
385 * the best error-detecting properties, this should correspond to the
386 * order they're actually sent. For example, standard RS-232 serial is
387 * little-endian; the most significant bit (sometimes used for parity)
388 * is sent last. And when appending a CRC word to a message, you should
389 * do it in the right order, matching the endianness.
391 * Just like with ordinary division, the remainder is always smaller than
392 * the divisor (the CRC polynomial) you're dividing by. Each step of the
393 * division, you take one more digit (bit) of the dividend and append it
394 * to the current remainder. Then you figure out the appropriate multiple
395 * of the divisor to subtract to being the remainder back into range.
396 * In binary, it's easy - it has to be either 0 or 1, and to make the
397 * XOR cancel, it's just a copy of bit 32 of the remainder.
399 * When computing a CRC, we don't care about the quotient, so we can
400 * throw the quotient bit away, but subtract the appropriate multiple of
401 * the polynomial from the remainder and we're back to where we started,
402 * ready to process the next bit.
404 * A big-endian CRC written this way would be coded like:
405 * for (i = 0; i < input_bits; i++) {
406 * multiple = remainder & 0x80000000 ? CRCPOLY : 0;
407 * remainder = (remainder << 1 | next_input_bit()) ^ multiple;
409 * Notice how, to get at bit 32 of the shifted remainder, we look
410 * at bit 31 of the remainder *before* shifting it.
412 * But also notice how the next_input_bit() bits we're shifting into
413 * the remainder don't actually affect any decision-making until
414 * 32 bits later. Thus, the first 32 cycles of this are pretty boring.
415 * Also, to add the CRC to a message, we need a 32-bit-long hole for it at
416 * the end, so we have to add 32 extra cycles shifting in zeros at the
417 * end of every message,
419 * So the standard trick is to rearrage merging in the next_input_bit()
420 * until the moment it's needed. Then the first 32 cycles can be precomputed,
421 * and merging in the final 32 zero bits to make room for the CRC can be
423 * This changes the code to:
424 * for (i = 0; i < input_bits; i++) {
425 * remainder ^= next_input_bit() << 31;
426 * multiple = (remainder & 0x80000000) ? CRCPOLY : 0;
427 * remainder = (remainder << 1) ^ multiple;
429 * With this optimization, the little-endian code is simpler:
430 * for (i = 0; i < input_bits; i++) {
431 * remainder ^= next_input_bit();
432 * multiple = (remainder & 1) ? CRCPOLY : 0;
433 * remainder = (remainder >> 1) ^ multiple;
436 * Note that the other details of endianness have been hidden in CRCPOLY
437 * (which must be bit-reversed) and next_input_bit().
439 * However, as long as next_input_bit is returning the bits in a sensible
440 * order, we can actually do the merging 8 or more bits at a time rather
441 * than one bit at a time:
442 * for (i = 0; i < input_bytes; i++) {
443 * remainder ^= next_input_byte() << 24;
444 * for (j = 0; j < 8; j++) {
445 * multiple = (remainder & 0x80000000) ? CRCPOLY : 0;
446 * remainder = (remainder << 1) ^ multiple;
449 * Or in little-endian:
450 * for (i = 0; i < input_bytes; i++) {
451 * remainder ^= next_input_byte();
452 * for (j = 0; j < 8; j++) {
453 * multiple = (remainder & 1) ? CRCPOLY : 0;
454 * remainder = (remainder << 1) ^ multiple;
457 * If the input is a multiple of 32 bits, you can even XOR in a 32-bit
458 * word at a time and increase the inner loop count to 32.
460 * You can also mix and match the two loop styles, for example doing the
461 * bulk of a message byte-at-a-time and adding bit-at-a-time processing
462 * for any fractional bytes at the end.
464 * The only remaining optimization is to the byte-at-a-time table method.
465 * Here, rather than just shifting one bit of the remainder to decide
466 * in the correct multiple to subtract, we can shift a byte at a time.
467 * This produces a 40-bit (rather than a 33-bit) intermediate remainder,
468 * but again the multiple of the polynomial to subtract depends only on
469 * the high bits, the high 8 bits in this case.
471 * The multiple we need in that case is the low 32 bits of a 40-bit
472 * value whose high 8 bits are given, and which is a multiple of the
473 * generator polynomial. This is simply the CRC-32 of the given
476 * Two more details: normally, appending zero bits to a message which
477 * is already a multiple of a polynomial produces a larger multiple of that
478 * polynomial. To enable a CRC to detect this condition, it's common to
479 * invert the CRC before appending it. This makes the remainder of the
480 * message+crc come out not as zero, but some fixed non-zero value.
482 * The same problem applies to zero bits prepended to the message, and
483 * a similar solution is used. Instead of starting with a remainder of
484 * 0, an initial remainder of all ones is used. As long as you start
485 * the same way on decoding, it doesn't make a difference.
493 #if 0 /*Not used at present */
495 buf_dump(char const *prefix, unsigned char const *buf, size_t len)
497 fputs(prefix, stdout);
499 printf(" %02x", *buf++);
505 static void bytereverse(unsigned char *buf, size_t len)
508 unsigned char x = bitrev8(*buf);
513 static void random_garbage(unsigned char *buf, size_t len)
516 *buf++ = (unsigned char) random();
519 #if 0 /* Not used at present */
520 static void store_le(u32 x, unsigned char *buf)
522 buf[0] = (unsigned char) x;
523 buf[1] = (unsigned char) (x >> 8);
524 buf[2] = (unsigned char) (x >> 16);
525 buf[3] = (unsigned char) (x >> 24);
529 static void store_be(u32 x, unsigned char *buf)
531 buf[0] = (unsigned char) (x >> 24);
532 buf[1] = (unsigned char) (x >> 16);
533 buf[2] = (unsigned char) (x >> 8);
534 buf[3] = (unsigned char) x;
538 * This checks that CRC(buf + CRC(buf)) = 0, and that
539 * CRC commutes with bit-reversal. This has the side effect
540 * of bytewise bit-reversing the input buffer, and returns
541 * the CRC of the reversed buffer.
543 static u32 test_step(u32 init, unsigned char *buf, size_t len)
548 crc1 = crc32_be(init, buf, len);
549 store_be(crc1, buf + len);
550 crc2 = crc32_be(init, buf, len + 4);
552 printf("\nCRC cancellation fail: 0x%08x should be 0\n",
555 for (i = 0; i <= len + 4; i++) {
556 crc2 = crc32_be(init, buf, i);
557 crc2 = crc32_be(crc2, buf + i, len + 4 - i);
559 printf("\nCRC split fail: 0x%08x\n", crc2);
562 /* Now swap it around for the other test */
564 bytereverse(buf, len + 4);
565 init = bitrev32(init);
566 crc2 = bitrev32(crc1);
567 if (crc1 != bitrev32(crc2))
568 printf("\nBit reversal fail: 0x%08x -> 0x%08x -> 0x%08x\n",
569 crc1, crc2, bitrev32(crc2));
570 crc1 = crc32_le(init, buf, len);
572 printf("\nCRC endianness fail: 0x%08x != 0x%08x\n", crc1,
574 crc2 = crc32_le(init, buf, len + 4);
576 printf("\nCRC cancellation fail: 0x%08x should be 0\n",
579 for (i = 0; i <= len + 4; i++) {
580 crc2 = crc32_le(init, buf, i);
581 crc2 = crc32_le(crc2, buf + i, len + 4 - i);
583 printf("\nCRC split fail: 0x%08x\n", crc2);
595 unsigned char buf1[SIZE + 4];
596 unsigned char buf2[SIZE + 4];
597 unsigned char buf3[SIZE + 4];
599 u32 crc1, crc2, crc3;
601 for (i = 0; i <= SIZE; i++) {
602 printf("\rTesting length %d...", i);
604 random_garbage(buf1, i);
605 random_garbage(buf2, i);
606 for (j = 0; j < i; j++)
607 buf3[j] = buf1[j] ^ buf2[j];
609 crc1 = test_step(INIT1, buf1, i);
610 crc2 = test_step(INIT2, buf2, i);
611 /* Now check that CRC(buf1 ^ buf2) = CRC(buf1) ^ CRC(buf2) */
612 crc3 = test_step(INIT1 ^ INIT2, buf3, i);
613 if (crc3 != (crc1 ^ crc2))
614 printf("CRC XOR fail: 0x%08x != 0x%08x ^ 0x%08x\n",
617 printf("\nAll test complete. No failures expected.\n");
621 #endif /* UNITTEST */