295 lines
7.1 KiB
C
295 lines
7.1 KiB
C
|
// SPDX-License-Identifier: GPL-2.0-only
|
||
|
/*
|
||
|
* Accelerated CRC32 implementation with Zbc extension.
|
||
|
*
|
||
|
* Copyright (C) 2024 Intel Corporation
|
||
|
*/
|
||
|
|
||
|
#include <asm/hwcap.h>
|
||
|
#include <asm/alternative-macros.h>
|
||
|
#include <asm/byteorder.h>
|
||
|
|
||
|
#include <linux/types.h>
|
||
|
#include <linux/minmax.h>
|
||
|
#include <linux/crc32poly.h>
|
||
|
#include <linux/crc32.h>
|
||
|
#include <linux/byteorder/generic.h>
|
||
|
|
||
|
/*
|
||
|
* Refer to https://www.corsix.org/content/barrett-reduction-polynomials for
|
||
|
* better understanding of how this math works.
|
||
|
*
|
||
|
* let "+" denotes polynomial add (XOR)
|
||
|
* let "-" denotes polynomial sub (XOR)
|
||
|
* let "*" denotes polynomial multiplication
|
||
|
* let "/" denotes polynomial floor division
|
||
|
* let "S" denotes source data, XLEN bit wide
|
||
|
* let "P" denotes CRC32 polynomial
|
||
|
* let "T" denotes 2^(XLEN+32)
|
||
|
* let "QT" denotes quotient of T/P, with the bit for 2^XLEN being implicit
|
||
|
*
|
||
|
* crc32(S, P)
|
||
|
* => S * (2^32) - S * (2^32) / P * P
|
||
|
* => lowest 32 bits of: S * (2^32) / P * P
|
||
|
* => lowest 32 bits of: S * (2^32) * (T / P) / T * P
|
||
|
* => lowest 32 bits of: S * (2^32) * quotient / T * P
|
||
|
* => lowest 32 bits of: S * quotient / 2^XLEN * P
|
||
|
* => lowest 32 bits of: (clmul_high_part(S, QT) + S) * P
|
||
|
* => clmul_low_part(clmul_high_part(S, QT) + S, P)
|
||
|
*
|
||
|
* In terms of below implementations, the BE case is more intuitive, since the
|
||
|
* higher order bit sits at more significant position.
|
||
|
*/
|
||
|
|
||
|
#if __riscv_xlen == 64
|
||
|
/* Slide by XLEN bits per iteration */
|
||
|
# define STEP_ORDER 3
|
||
|
|
||
|
/* Each below polynomial quotient has an implicit bit for 2^XLEN */
|
||
|
|
||
|
/* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in LE format */
|
||
|
# define CRC32_POLY_QT_LE 0x5a72d812fb808b20
|
||
|
|
||
|
/* Polynomial quotient of (2^(XLEN+32))/CRC32C_POLY, in LE format */
|
||
|
# define CRC32C_POLY_QT_LE 0xa434f61c6f5389f8
|
||
|
|
||
|
/* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in BE format, it should be
|
||
|
* the same as the bit-reversed version of CRC32_POLY_QT_LE
|
||
|
*/
|
||
|
# define CRC32_POLY_QT_BE 0x04d101df481b4e5a
|
||
|
|
||
|
static inline u64 crc32_le_prep(u32 crc, unsigned long const *ptr)
|
||
|
{
|
||
|
return (u64)crc ^ (__force u64)__cpu_to_le64(*ptr);
|
||
|
}
|
||
|
|
||
|
static inline u32 crc32_le_zbc(unsigned long s, u32 poly, unsigned long poly_qt)
|
||
|
{
|
||
|
u32 crc;
|
||
|
|
||
|
/* We don't have a "clmulrh" insn, so use clmul + slli instead. */
|
||
|
asm volatile (".option push\n"
|
||
|
".option arch,+zbc\n"
|
||
|
"clmul %0, %1, %2\n"
|
||
|
"slli %0, %0, 1\n"
|
||
|
"xor %0, %0, %1\n"
|
||
|
"clmulr %0, %0, %3\n"
|
||
|
"srli %0, %0, 32\n"
|
||
|
".option pop\n"
|
||
|
: "=&r" (crc)
|
||
|
: "r" (s),
|
||
|
"r" (poly_qt),
|
||
|
"r" ((u64)poly << 32)
|
||
|
:);
|
||
|
return crc;
|
||
|
}
|
||
|
|
||
|
static inline u64 crc32_be_prep(u32 crc, unsigned long const *ptr)
|
||
|
{
|
||
|
return ((u64)crc << 32) ^ (__force u64)__cpu_to_be64(*ptr);
|
||
|
}
|
||
|
|
||
|
#elif __riscv_xlen == 32
|
||
|
# define STEP_ORDER 2
|
||
|
/* Each quotient should match the upper half of its analog in RV64 */
|
||
|
# define CRC32_POLY_QT_LE 0xfb808b20
|
||
|
# define CRC32C_POLY_QT_LE 0x6f5389f8
|
||
|
# define CRC32_POLY_QT_BE 0x04d101df
|
||
|
|
||
|
static inline u32 crc32_le_prep(u32 crc, unsigned long const *ptr)
|
||
|
{
|
||
|
return crc ^ (__force u32)__cpu_to_le32(*ptr);
|
||
|
}
|
||
|
|
||
|
static inline u32 crc32_le_zbc(unsigned long s, u32 poly, unsigned long poly_qt)
|
||
|
{
|
||
|
u32 crc;
|
||
|
|
||
|
/* We don't have a "clmulrh" insn, so use clmul + slli instead. */
|
||
|
asm volatile (".option push\n"
|
||
|
".option arch,+zbc\n"
|
||
|
"clmul %0, %1, %2\n"
|
||
|
"slli %0, %0, 1\n"
|
||
|
"xor %0, %0, %1\n"
|
||
|
"clmulr %0, %0, %3\n"
|
||
|
".option pop\n"
|
||
|
: "=&r" (crc)
|
||
|
: "r" (s),
|
||
|
"r" (poly_qt),
|
||
|
"r" (poly)
|
||
|
:);
|
||
|
return crc;
|
||
|
}
|
||
|
|
||
|
static inline u32 crc32_be_prep(u32 crc, unsigned long const *ptr)
|
||
|
{
|
||
|
return crc ^ (__force u32)__cpu_to_be32(*ptr);
|
||
|
}
|
||
|
|
||
|
#else
|
||
|
# error "Unexpected __riscv_xlen"
|
||
|
#endif
|
||
|
|
||
|
static inline u32 crc32_be_zbc(unsigned long s)
|
||
|
{
|
||
|
u32 crc;
|
||
|
|
||
|
asm volatile (".option push\n"
|
||
|
".option arch,+zbc\n"
|
||
|
"clmulh %0, %1, %2\n"
|
||
|
"xor %0, %0, %1\n"
|
||
|
"clmul %0, %0, %3\n"
|
||
|
".option pop\n"
|
||
|
: "=&r" (crc)
|
||
|
: "r" (s),
|
||
|
"r" (CRC32_POLY_QT_BE),
|
||
|
"r" (CRC32_POLY_BE)
|
||
|
:);
|
||
|
return crc;
|
||
|
}
|
||
|
|
||
|
#define STEP (1 << STEP_ORDER)
|
||
|
#define OFFSET_MASK (STEP - 1)
|
||
|
|
||
|
typedef u32 (*fallback)(u32 crc, unsigned char const *p, size_t len);
|
||
|
|
||
|
static inline u32 crc32_le_unaligned(u32 crc, unsigned char const *p,
|
||
|
size_t len, u32 poly,
|
||
|
unsigned long poly_qt)
|
||
|
{
|
||
|
size_t bits = len * 8;
|
||
|
unsigned long s = 0;
|
||
|
u32 crc_low = 0;
|
||
|
|
||
|
for (int i = 0; i < len; i++)
|
||
|
s = ((unsigned long)*p++ << (__riscv_xlen - 8)) | (s >> 8);
|
||
|
|
||
|
s ^= (unsigned long)crc << (__riscv_xlen - bits);
|
||
|
if (__riscv_xlen == 32 || len < sizeof(u32))
|
||
|
crc_low = crc >> bits;
|
||
|
|
||
|
crc = crc32_le_zbc(s, poly, poly_qt);
|
||
|
crc ^= crc_low;
|
||
|
|
||
|
return crc;
|
||
|
}
|
||
|
|
||
|
static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p,
|
||
|
size_t len, u32 poly,
|
||
|
unsigned long poly_qt,
|
||
|
fallback crc_fb)
|
||
|
{
|
||
|
size_t offset, head_len, tail_len;
|
||
|
unsigned long const *p_ul;
|
||
|
unsigned long s;
|
||
|
|
||
|
asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
|
||
|
RISCV_ISA_EXT_ZBC, 1)
|
||
|
: : : : legacy);
|
||
|
|
||
|
/* Handle the unaligned head. */
|
||
|
offset = (unsigned long)p & OFFSET_MASK;
|
||
|
if (offset && len) {
|
||
|
head_len = min(STEP - offset, len);
|
||
|
crc = crc32_le_unaligned(crc, p, head_len, poly, poly_qt);
|
||
|
p += head_len;
|
||
|
len -= head_len;
|
||
|
}
|
||
|
|
||
|
tail_len = len & OFFSET_MASK;
|
||
|
len = len >> STEP_ORDER;
|
||
|
p_ul = (unsigned long const *)p;
|
||
|
|
||
|
for (int i = 0; i < len; i++) {
|
||
|
s = crc32_le_prep(crc, p_ul);
|
||
|
crc = crc32_le_zbc(s, poly, poly_qt);
|
||
|
p_ul++;
|
||
|
}
|
||
|
|
||
|
/* Handle the tail bytes. */
|
||
|
p = (unsigned char const *)p_ul;
|
||
|
if (tail_len)
|
||
|
crc = crc32_le_unaligned(crc, p, tail_len, poly, poly_qt);
|
||
|
|
||
|
return crc;
|
||
|
|
||
|
legacy:
|
||
|
return crc_fb(crc, p, len);
|
||
|
}
|
||
|
|
||
|
u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
|
||
|
{
|
||
|
return crc32_le_generic(crc, p, len, CRC32_POLY_LE, CRC32_POLY_QT_LE,
|
||
|
crc32_le_base);
|
||
|
}
|
||
|
|
||
|
u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
|
||
|
{
|
||
|
return crc32_le_generic(crc, p, len, CRC32C_POLY_LE,
|
||
|
CRC32C_POLY_QT_LE, __crc32c_le_base);
|
||
|
}
|
||
|
|
||
|
static inline u32 crc32_be_unaligned(u32 crc, unsigned char const *p,
|
||
|
size_t len)
|
||
|
{
|
||
|
size_t bits = len * 8;
|
||
|
unsigned long s = 0;
|
||
|
u32 crc_low = 0;
|
||
|
|
||
|
s = 0;
|
||
|
for (int i = 0; i < len; i++)
|
||
|
s = *p++ | (s << 8);
|
||
|
|
||
|
if (__riscv_xlen == 32 || len < sizeof(u32)) {
|
||
|
s ^= crc >> (32 - bits);
|
||
|
crc_low = crc << bits;
|
||
|
} else {
|
||
|
s ^= (unsigned long)crc << (bits - 32);
|
||
|
}
|
||
|
|
||
|
crc = crc32_be_zbc(s);
|
||
|
crc ^= crc_low;
|
||
|
|
||
|
return crc;
|
||
|
}
|
||
|
|
||
|
u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
|
||
|
{
|
||
|
size_t offset, head_len, tail_len;
|
||
|
unsigned long const *p_ul;
|
||
|
unsigned long s;
|
||
|
|
||
|
asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
|
||
|
RISCV_ISA_EXT_ZBC, 1)
|
||
|
: : : : legacy);
|
||
|
|
||
|
/* Handle the unaligned head. */
|
||
|
offset = (unsigned long)p & OFFSET_MASK;
|
||
|
if (offset && len) {
|
||
|
head_len = min(STEP - offset, len);
|
||
|
crc = crc32_be_unaligned(crc, p, head_len);
|
||
|
p += head_len;
|
||
|
len -= head_len;
|
||
|
}
|
||
|
|
||
|
tail_len = len & OFFSET_MASK;
|
||
|
len = len >> STEP_ORDER;
|
||
|
p_ul = (unsigned long const *)p;
|
||
|
|
||
|
for (int i = 0; i < len; i++) {
|
||
|
s = crc32_be_prep(crc, p_ul);
|
||
|
crc = crc32_be_zbc(s);
|
||
|
p_ul++;
|
||
|
}
|
||
|
|
||
|
/* Handle the tail bytes. */
|
||
|
p = (unsigned char const *)p_ul;
|
||
|
if (tail_len)
|
||
|
crc = crc32_be_unaligned(crc, p, tail_len);
|
||
|
|
||
|
return crc;
|
||
|
|
||
|
legacy:
|
||
|
return crc32_be_base(crc, p, len);
|
||
|
}
|