diff options
-rw-r--r-- | btrfs.c | 2 | ||||
-rw-r--r-- | crc32c.c | 125 | ||||
-rw-r--r-- | crc32c.h | 1 |
3 files changed, 126 insertions, 2 deletions
@@ -258,6 +258,8 @@ int main(int argc, char **argv) { const struct cmd_struct *cmd; + crc32c_optimization_init(); + argc--; argv++; handle_options(&argc, &argv); @@ -9,6 +9,123 @@ */ #include "kerncompat.h" #include "crc32c.h" +#include <inttypes.h> +#include <string.h> +#include <unistd.h> +#include <stdlib.h> +#include <signal.h> +#include <sys/types.h> +#include <sys/wait.h> + +u32 __crc32c_le(u32 crc, unsigned char const *data, size_t length); +static u32 (*crc_function)(u32 crc, unsigned char const *data, size_t length) = __crc32c_le; + +#ifdef __x86_64__ + +/* + * Based on a posting to lkml by Austin Zhang <austin.zhang@intel.com> + * + * Using hardware provided CRC32 instruction to accelerate the CRC32 disposal. + * CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE) + * CRC32 is a new instruction in Intel SSE4.2, the reference can be found at: + * http://www.intel.com/products/processor/manuals/ + * Intel(R) 64 and IA-32 Architectures Software Developer's Manual + * Volume 2A: Instruction Set Reference, A-M + */ +#if __SIZEOF_LONG__ == 8 +#define REX_PRE "0x48, " +#define SCALE_F 8 +#else +#define REX_PRE +#define SCALE_F 4 +#endif + +static int crc32c_probed = 0; +static int crc32c_intel_available = 0; + +static uint32_t crc32c_intel_le_hw_byte(uint32_t crc, unsigned char const *data, + unsigned long length) +{ + while (length--) { + __asm__ __volatile__( + ".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1" + :"=S"(crc) + :"0"(crc), "c"(*data) + ); + data++; + } + + return crc; +} + +/* + * Steps through buffer one byte at at time, calculates reflected + * crc using table. + */ +uint32_t crc32c_intel(u32 crc, unsigned char const *data, unsigned long length) +{ + unsigned int iquotient = length / SCALE_F; + unsigned int iremainder = length % SCALE_F; + unsigned long *ptmp = (unsigned long *)data; + + while (iquotient--) { + __asm__ __volatile__( + ".byte 0xf2, " REX_PRE "0xf, 0x38, 0xf1, 0xf1;" + :"=S"(crc) + :"0"(crc), "c"(*ptmp) + ); + ptmp++; + } + + if (iremainder) + crc = crc32c_intel_le_hw_byte(crc, (unsigned char *)ptmp, + iremainder); + + return crc; +} + +static void do_cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, + unsigned int *edx) +{ + int id = *eax; + + asm("movl %4, %%eax;" + "cpuid;" + "movl %%eax, %0;" + "movl %%ebx, %1;" + "movl %%ecx, %2;" + "movl %%edx, %3;" + : "=r" (*eax), "=r" (*ebx), "=r" (*ecx), "=r" (*edx) + : "r" (id) + : "eax", "ebx", "ecx", "edx"); +} + +void crc32c_intel_probe(void) +{ + if (!crc32c_probed) { + unsigned int eax, ebx, ecx, edx; + + eax = 1; + + do_cpuid(&eax, &ebx, &ecx, &edx); + crc32c_intel_available = (ecx & (1 << 20)) != 0; + crc32c_probed = 1; + } +} + +int crc32c_optimization_init(void) +{ + crc32c_intel_probe(); + if (crc32c_intel_available) + crc_function = crc32c_intel; +} +#else + +int crc32c_optimization_init(void) +{ +} + +#endif /* __x86_64__ */ /* * This is the CRC-32C table @@ -91,11 +208,15 @@ static const u32 crc32c_table[256] = { * crc using table. */ -u32 crc32c_le(u32 crc, unsigned char const *data, size_t length) +u32 __crc32c_le(u32 crc, unsigned char const *data, size_t length) { while (length--) crc = crc32c_table[(crc ^ *data++) & 0xFFL] ^ (crc >> 8); - return crc; } + +u32 crc32c_le(u32 crc, unsigned char const *data, size_t length) +{ + return crc_function(crc, data, length); +} @@ -22,6 +22,7 @@ #include "kerncompat.h" u32 crc32c_le(u32 seed, unsigned char const *data, size_t length); +int crc32c_optimization_init(void); #define crc32c(seed, data, length) crc32c_le(seed, (unsigned char const *)data, length) #define btrfs_crc32c crc32c |