src/liblzma/check/crc32.c

   1 ///////////////////////////////////////////////////////////////////////////////
   2 //
   3 /// \file       crc32.c
   4 /// \brief      CRC32 calculation
   5 //
   6 //  This code has been put into the public domain.
   7 //
   8 //  This library is distributed in the hope that it will be useful,
   9 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11 //
  12 ///////////////////////////////////////////////////////////////////////////////
  13
  14 #include "check.h"
  15 #include "crc_macros.h"
  16
  17
  18 // If you make any changes, do some bench marking! Seemingly unrelated
  19 // changes can very easily ruin the performance (and very probably is
  20 // very compiler dependent).
  21 extern uint32_t
  22 lzma_crc32(const uint8_t *buf, size_t size, uint32_t crc)
  23 {
  24         crc = ~crc;
  25
  26 #ifdef WORDS_BIGENDIAN
  27         crc = bswap_32(crc);
  28 #endif
  29
  30         if (size > 8) {
  31                 // Fix the alignment, if needed. The if statement above
  32                 // ensures that this won't read past the end of buf[].
  33                 while ((uintptr_t)(buf) & 7) {
  34                         crc = lzma_crc32_table[0][*buf++ ^ A(crc)] ^ S8(crc);
  35                         --size;
  36                 }
  37
  38                 // Calculate the position where to stop.
  39                 const uint8_t *const limit = buf + (size & ~(size_t)(7));
  40
  41                 // Calculate how many bytes must be calculated separately
  42                 // before returning the result.
  43                 size &= (size_t)(7);
  44
  45                 // Calculate the CRC32 using the slice-by-eight algorithm.
  46                 // It is explained in this document:
  47                 // http://www.intel.com/technology/comms/perfnet/download/CRC_generators.pdf
  48                 //
  49                 // The code below is different than the code in Intel's
  50                 // paper, but the principle is identical. This should be
  51                 // faster with GCC than Intel's code. This is tested only
  52                 // with GCC 3.4.6 and 4.1.2 on x86, so your results may vary.
  53                 //
  54                 // Using -Os and -fomit-frame-pointer seem to give the best
  55                 // results at least with GCC 4.1.2 on x86. It's sill far
  56                 // from the speed of hand-optimized assembler.
  57                 while (buf < limit) {
  58                         crc ^= *(uint32_t *)(buf);
  59                         buf += 4;
  60
  61                         crc = lzma_crc32_table[7][A(crc)]
  62                             ^ lzma_crc32_table[6][B(crc)]
  63                             ^ lzma_crc32_table[5][C(crc)]
  64                             ^ lzma_crc32_table[4][D(crc)];
  65
  66                         const uint32_t tmp = *(uint32_t *)(buf);
  67                         buf += 4;
  68
  69                         // It is critical for performance, that
  70                         // the crc variable is XORed between the
  71                         // two table-lookup pairs.
  72                         crc = lzma_crc32_table[3][A(tmp)]
  73                             ^ lzma_crc32_table[2][B(tmp)]
  74                             ^ crc
  75                             ^ lzma_crc32_table[1][C(tmp)]
  76                             ^ lzma_crc32_table[0][D(tmp)];
  77                 }
  78         }
  79
  80         while (size-- != 0)
  81                 crc = lzma_crc32_table[0][*buf++ ^ A(crc)] ^ S8(crc);
  82
  83 #ifdef WORDS_BIGENDIAN
  84         crc = bswap_32(crc);
  85 #endif
  86
  87         return ~crc;
  88 }