1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
| #ifdef __x86_64__ __attribute__((target("sse2,pclmul"))) #endif inline void gfmul (__m128i a, __m128i b, __m128i *res){ __m128i tmp3, tmp6, tmp7, tmp8, tmp9, tmp10, tmp11, tmp12; __m128i XMMMASK = _mm_setr_epi32(0xffffffff, 0x0, 0x0, 0x0); mul128(a, b, &tmp3, &tmp6); tmp7 = _mm_srli_epi32(tmp6, 31); tmp8 = _mm_srli_epi32(tmp6, 30); tmp9 = _mm_srli_epi32(tmp6, 25); tmp7 = _mm_xor_si128(tmp7, tmp8); tmp7 = _mm_xor_si128(tmp7, tmp9); tmp8 = _mm_shuffle_epi32(tmp7, 147);
tmp7 = _mm_and_si128(XMMMASK, tmp8); tmp8 = _mm_andnot_si128(XMMMASK, tmp8); tmp3 = _mm_xor_si128(tmp3, tmp8); tmp6 = _mm_xor_si128(tmp6, tmp7); tmp10 = _mm_slli_epi32(tmp6, 1); tmp3 = _mm_xor_si128(tmp3, tmp10); tmp11 = _mm_slli_epi32(tmp6, 2); tmp3 = _mm_xor_si128(tmp3, tmp11); tmp12 = _mm_slli_epi32(tmp6, 7); tmp3 = _mm_xor_si128(tmp3, tmp12);
*res = _mm_xor_si128(tmp3, tmp6); }
|