Commit cbb42186 authored by Gaëtan Cassiers's avatar Gaëtan Cassiers
Browse files

start v2 (temp)

parent 1ebc8093
......@@ -9,6 +9,6 @@ shadow 32bit skylake-avx512
shadow 128bit x86-64
shadow 128bit haswell
shadow 128bit skylake-avx512
shadow 256bit haswell
shadow 256bit skylake-avx512
shadow 512bit skylake-avx512
#shadow 256bit haswell
#shadow 256bit skylake-avx512
#shadow 512bit skylake-avx512
......@@ -8,9 +8,9 @@ clyde 32bit shadow 128bit haswell
clyde 32bit shadow 128bit skylake-avx512
clyde 64bit shadow 128bit haswell
clyde 64bit shadow 128bit skylake-avx512
clyde 32bit shadow 256bit haswell
clyde 32bit shadow 256bit skylake-avx512
clyde 64bit shadow 256bit haswell
clyde 64bit shadow 256bit skylake-avx512
clyde 32bit shadow 512bit skylake-avx512
clyde 64bit shadow 512bit skylake-avx512
#clyde 32bit shadow 256bit haswell
#clyde 32bit shadow 256bit skylake-avx512
#clyde 64bit shadow 256bit haswell
#clyde 64bit shadow 256bit skylake-avx512
#clyde 32bit shadow 512bit skylake-avx512
#clyde 64bit shadow 512bit skylake-avx512
/* MIT License
*
* Copyright (c) 2019 Gaëtan Cassiers
* Copyright (c) 2019 2020 Gaëtan Cassiers
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
......@@ -72,11 +72,12 @@ static void init_sponge_state(shadow_state state,
const unsigned char *n) {
// init state
memset(state, 0, SHADOW_NBYTES);
memcpy(state[0], p, P_NBYTES);
memcpy(state[1], n, CRYPTO_NPUBBYTES);
memcpy(state[1], p, P_NBYTES);
memcpy(state[2], n, CRYPTO_NPUBBYTES);
// TBC
memcpy(state[MLS_BUNDLES-1], n, CRYPTO_NPUBBYTES);
clyde128_encrypt(state[MLS_BUNDLES-1], state[0], k);
memcpy(state[0], n, CRYPTO_NPUBBYTES);
clyde128_encrypt(state[0], state[1], k);
// initial permutation
shadow(state);
}
......
/* MIT License
*
* Copyright (c) 2019 Gaëtan Cassiers
* Copyright (c) 2019 2020 Gaëtan Cassiers
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
......@@ -31,6 +31,8 @@
#define IACA_END
#endif
#include <xmmintrin.h>
#include "primitives.h"
#define SHADOW_NS 6 // Number of steps
......@@ -50,32 +52,8 @@ typedef struct __attribute__((aligned(64))) shadow_simd {
static void sbox_layer_simd(shadow_simd* simd);
static void lbox_simd(row_set* x, row_set* y);
static void lbox_layer_simd(shadow_simd* simd);
static void add_rc_simd(shadow_simd* simd, unsigned int round);
static void dbox_mls_layer_simd(shadow_simd *simd);
#if SMALL_PERM==0
#define RS0 { 0, 0, 0, 0 }
#define RS1S { 1, 2, 4, 8 }
#else
#define RS0 { 0, 0, 0, 0 }
#define RS1S { 1, 2, 4, 0 }
#endif // SMALL_PERM==0
static const shadow_simd shadow_simd_rc[SHADOW_NR] = {
{{ RS1S, RS0, RS0, RS0 }}, // 0
{{ RS0, RS1S, RS0, RS0 }}, // 1
{{ RS0, RS0, RS1S, RS0 }}, // 2
{{ RS0, RS0, RS0, RS1S }}, // 3
{{ RS1S, RS1S, RS0, RS0 }}, // 4
{{ RS0, RS1S, RS1S, RS0 }}, // 5
{{ RS0, RS0, RS1S, RS1S }}, // 6
{{ RS1S, RS1S, RS0, RS1S }}, // 7
{{ RS1S, RS0, RS1S, RS0 }}, // 8
{{ RS0, RS1S, RS0, RS1S }}, // 9
{{ RS1S, RS1S, RS1S, RS0 }}, // 10
{{ RS0, RS1S, RS1S, RS1S }} // 11
};
static void sbox_layer_simd(shadow_simd* simd) {
row_set y1 = (simd->rows[0] & simd->rows[1]) ^ simd->rows[2];
row_set y0 = (simd->rows[3] & simd->rows[0]) ^ simd->rows[1];
......@@ -111,29 +89,105 @@ static void lbox_layer_simd(shadow_simd* simd) {
lbox_simd(&simd->rows[2], &simd->rows[3]);
}
static void add_rc_simd(shadow_simd* simd, unsigned int round) {
for (unsigned int i = 0; i < LS_ROWS; i++) {
simd->rows[i] ^= shadow_simd_rc[round].rows[i];
}
static const row_set dbox_shuffle1 = { 0, 4, 1, 5 };
static const row_set dbox_shuffle2 = { 2, 6, 3, 7 };
static const row_set dbox_shuffle3 = { 0, 1, 4, 5 };
static const row_set dbox_shuffle4 = { 2, 3, 6, 7 };
static void transpose_state(shadow_simd *simd) {
row_set t0 = __builtin_shuffle(simd->rows[0], simd->rows[1], dbox_shuffle1);
row_set t1 = __builtin_shuffle(simd->rows[2], simd->rows[3], dbox_shuffle1);
row_set t2 = __builtin_shuffle(simd->rows[0], simd->rows[1], dbox_shuffle2);
row_set t3 = __builtin_shuffle(simd->rows[2], simd->rows[3], dbox_shuffle2);
simd->rows[0] = __builtin_shuffle(t0, t1, dbox_shuffle3);
simd->rows[1] = __builtin_shuffle(t0, t1, dbox_shuffle4);
simd->rows[2] = __builtin_shuffle(t2, t3, dbox_shuffle3);
simd->rows[3] = __builtin_shuffle(t2, t3, dbox_shuffle4);
/*
__m128i I0 = simd->rows[0];
__m128i I1 = simd->rows[1];
__m128i I2 = simd->rows[2];
__m128i I3 = simd->rows[3];
__m128i T0 = _mm_unpacklo_epi32(I0, I1);
__m128i T1 = _mm_unpacklo_epi32(I2, I3);
__m128i T2 = _mm_unpackhi_epi32(I0, I1);
__m128i T3 = _mm_unpackhi_epi32(I2, I3);
simd->rows[0] = _mm_unpacklo_epi64(T0, T1);
simd->rows[1] = _mm_unpackhi_epi64(T0, T1);
simd->rows[2] = _mm_unpacklo_epi64(T2, T3);
simd->rows[3] = _mm_unpackhi_epi64(T2, T3);
*/
}
static row_set xtime(row_set x) {
row_set b = x >> 31;
return (x << 1) ^ b ^ (b << 8);
}
static void dbox_mls_layer_simd(shadow_simd *simd) {
#if SMALL_PERM==0
simd->rows[0] ^= simd->rows[1];
simd->rows[2] ^= simd->rows[3];
simd->rows[1] ^= simd->rows[2];
simd->rows[3] ^= xtime(simd->rows[0]);
simd->rows[1] = xtime(simd->rows[1]);
simd->rows[0] ^= simd->rows[1];
simd->rows[2] ^= xtime(simd->rows[3]);
simd->rows[1] ^= simd->rows[2];
simd->rows[3] ^= simd->rows[0];
#else
row_set x0 = simd->rows[0];
row_set x1 = simd->rows[1];
row_set x2 = simd->rows[2];
row_set a = x0 ^ x1;
row_set b = x0 ^ x2;
row_set c = x1 ^ b;
row_set d = a ^ xtime(b);
simd->rows[0] = b ^ d;
simd->rows[1] = c;
simd->rows[2] = d;
#endif
}
// Row on which to XOR the constant in Shadow Round A
static const uint32_t SHADOW_RA_CST_ROW = 1;
// Bundle on which to XOR the constant in Shadow Round B
static const uint32_t SHADOW_RB_CST_BUNDLE = 0;
#if SMALL_PERM==0
static const row_set dbox_shuffle1 = { 1, 0, 0, 0 };
static const row_set dbox_shuffle2 = { 2, 2, 1, 1 };
static const row_set dbox_shuffle3 = { 3, 3, 3, 2 };
static const row_set SHADOW_CST_RA[SHADOW_NS] = {
{ 0xf8737400, 0xf0e6e8c5, 0xe1cdd14f, 0xc39ba25b },
{ 0x73744118, 0xe6e88230, 0xcdd104a5, 0x9ba2098f },
{ 0x74413cff, 0xe88279fe, 0xd104f339, 0xa209e6b7 },
{ 0x413cd9a4, 0x8279b348, 0x4f36655, 0x9e6ccaa },
{ 0x3cd99585, 0x79b32b0a, 0xf3665614, 0xe6ccaced },
{ 0xd99594cc, 0xb32b295d, 0x6656527f, 0xccaca4fe }
};
static const row_set SHADOW_CST_RB[SHADOW_NS] = {
{ 0x87374473, 0xe6e8823, 0x1cdd1046, 0x39ba208c },
{ 0x374413db, 0x6e8827b6, 0xdd104f6c, 0xba209e1d },
{ 0x4413cdab, 0x88279b56, 0x104f3669, 0x209e6cd2 },
{ 0x13cd9954, 0x279b32a8, 0x4f366550, 0x9e6ccaa0 },
{ 0xcd99591f, 0x9b32b2fb, 0x36656533, 0x6ccaca66 },
{ 0x99594939, 0x32b292b7, 0x6565256e, 0xcaca4adc }
};
#else
static const row_set dbox_shuffle1 = { 0, 0, 0, 3 };
static const row_set dbox_shuffle2 = { 1, 2, 1, 3 };
static const row_set dbox_shuffle3 = { 2, 3, 3, 3 };
static const row_set SHADOW_CST_RA[SHADOW_NS] = {
{ 0xf8737400, 0xf0e6e8c5, 0xe1cdd14f, 0x0 },
{ 0x39ba208c, 0x73744118, 0xe6e88230, 0x0 },
{ 0xdd104f6c, 0xba209e1d, 0x74413cff, 0x0 },
{ 0x88279b56, 0x104f3669, 0x209e6cd2, 0x0 },
{ 0x13cd9954, 0x279b32a8, 0x4f366550, 0x0 },
{ 0xe6ccaced, 0xcd99591f, 0x9b32b2fb, 0x0 }
};
static const row_set SHADOW_CST_RB[SHADOW_NS] = {
{ 0xc39ba25b, 0x87374473, 0xe6e8823, 0x1cdd1046 },
{ 0xcdd104a5, 0x9ba2098f, 0x374413db, 0x6e8827b6 },
{ 0xe88279fe, 0xd104f339, 0xa209e6b7, 0x4413cdab },
{ 0x413cd9a4, 0x8279b348, 0x4f36655, 0x9e6ccaa },
{ 0x9e6ccaa0, 0x3cd99585, 0x79b32b0a, 0xf3665614 },
{ 0x36656533, 0x6ccaca66, 0xd99594cc, 0xb32b295d }
};
#endif // SMALL_PERM==0
static void dbox_mls_layer_simd(shadow_simd *simd) {
for (unsigned int row = 0; row < LS_ROWS; row++) {
row_set a = __builtin_shuffle(simd->rows[row], dbox_shuffle1);
row_set b = __builtin_shuffle(simd->rows[row], dbox_shuffle2);
row_set c = __builtin_shuffle(simd->rows[row], dbox_shuffle3);
simd->rows[row] = a ^ b ^ c;
}
}
void shadow(shadow_state state) {
#if SMALL_PERM==0
shadow_simd simd = {
......@@ -158,10 +212,12 @@ void shadow(shadow_state state) {
IACA_START
sbox_layer_simd(&simd);
lbox_layer_simd(&simd);
add_rc_simd(&simd, 2*s);
simd.rows[SHADOW_RA_CST_ROW] ^= SHADOW_CST_RA[s];
sbox_layer_simd(&simd);
transpose_state(&simd);
dbox_mls_layer_simd(&simd);
add_rc_simd(&simd, 2*s+1);
simd.rows[SHADOW_RB_CST_BUNDLE] ^= SHADOW_CST_RB[s];
transpose_state(&simd);
}
IACA_END
row_set res0 = { simd.rows[0][0], simd.rows[1][0], simd.rows[2][0], simd.rows[3][0] };
......
/* MIT License
*
* Copyright (c) 2019 Gaëtan Cassiers
* Copyright (c) 2019 2020 Gaëtan Cassiers
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
......@@ -48,6 +48,11 @@ typedef union __attribute__((aligned(64))) shadow_simd {
__m256i rowsi[2];
} shadow_simd;
typedef union {
__m128i sse;
row_set vec;
} ssew;
// drow_set[0] = { B0R0, B1R0, B2R0, B3R0, B0R2, B1R2, B2R2, B3R2 }
// drow_set[1] = { B0R1, B1R1, B2R1, B3R1, B0R3, B1R3, B2R3, B3R3 }
......@@ -102,6 +107,17 @@ static void lbox_layer_simd(shadow_simd* simd) {
dlbox_simd(&simd->rows[0], &simd->rows[1]);
}
static row_set xtime(row_set x) {
row_set b = x >> 31;
return (x << 1) ^ b ^ (b << 8);
}
static __m128i xtimew(__m128i x) {
ssew xs;
xs.sse = x;
xs.vec = xtime(xs.vec);
return xs.sse;
}
static const drow_set sel_low = { 0, 1, 2, 3, 0, 1, 2, 3 };
static const drow_set sel_high = { 4, 5, 6, 7, 4, 5, 6, 7 };
static const drow_set dispatch = { 0, 1, 2, 3, 12, 13, 14, 15 };
......@@ -110,24 +126,21 @@ static void sbox_layer_simd(shadow_simd* simd) {
__m128i x2 = _mm256_extracti128_si256(simd->rowsi[0], 1);
__m128i x1 = _mm256_castsi256_si128(simd->rowsi[1]);
__m128i x3 = _mm256_extracti128_si256(simd->rowsi[1], 1);
__m128i y1 = (x0 & x1) ^ x2;
__m128i y0 = (x0 & x3) ^ x1;
__m128i y3 = (y1 & x3) ^ x0;
__m128i y2 = (y0 & y1) ^ x3;
/*
__m256i res0 = _mm256_castsi128_si256(y0);
res0 = _mm256_inserti32x4(res0, y2, 1);
simd->rowsi[0] = res0;
__m256i res1 = _mm256_castsi128_si256(y1);
res1 = _mm256_inserti32x4(res1, y3, 1);
simd->rowsi[1] = res1;
*/
simd->rowsi[0] = _mm256_castsi128_si256(y0);
//simd->rowsi[0] = _mm256_inserti32x4(simd->rowsi[0], y2, 1);
simd->rowsi[0] = _mm256_inserti128_si256(simd->rowsi[0], y2, 1);
simd->rowsi[1] = _mm256_castsi128_si256(y1);
//simd->rowsi[1] = _mm256_inserti32x4(simd->rowsi[1], y3, 1);
simd->rowsi[1] = _mm256_inserti128_si256(simd->rowsi[1], y3, 1);
x0 ^= x1;
x2 ^= x3;
x1 ^= x2;
x3 ^= xtimew(x0);
x1 = xtimew(x1);
x0 ^= x1;
x2 ^= xtimew(x3);
x1 ^= x2;
x3 ^= x0;
simd->rowsi[0] = _mm256_castsi128_si256(x0);
simd->rowsi[0] = _mm256_inserti128_si256(simd->rowsi[0], x2, 1);
simd->rowsi[1] = _mm256_castsi128_si256(x1);
simd->rowsi[1] = _mm256_inserti128_si256(simd->rowsi[1], x3, 1);
}
static void add_rc_simd(shadow_simd* simd, unsigned int round) {
for (unsigned int i = 0; i < 2; i++) {
......
/* MIT License
*
* Copyright (c) 2019 Gaëtan Cassiers
* Copyright (c) 2019 2020 Gaëtan Cassiers
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
......@@ -31,24 +31,6 @@
#define SHADOW_NS 6 // Number of steps
#define SHADOW_NR 2 * SHADOW_NS // Number of rounds
// Round constants for Clyde-128
static const uint32_t clyde128_rc[CLYDE_128_NR][LS_ROWS] = {
{ 1, 0, 0, 0 }, // 0
{ 0, 1, 0, 0 }, // 1
{ 0, 0, 1, 0 }, // 2
{ 0, 0, 0, 1 }, // 3
{ 1, 1, 0, 0 }, // 4
{ 0, 1, 1, 0 }, // 5
{ 0, 0, 1, 1 }, // 6
{ 1, 1, 0, 1 }, // 7
{ 1, 0, 1, 0 }, // 8
{ 0, 1, 0, 1 }, // 9
{ 1, 1, 1, 0 }, // 10
{ 0, 1, 1, 1 } // 11
};
// Apply a S-box layer to a Clyde-128 state.
static void sbox_layer(uint32_t* state) {
uint32_t y1 = (state[0] & state[1]) ^ state[2];
......@@ -81,50 +63,75 @@ static void lbox(uint32_t* x, uint32_t* y) {
*y = b;
}
static const uint32_t CST_LFSR_POLY_MASK = 0xc5;
// Initial value of the constant generation polynomial
// This is the result of applying the LFSR function 1024 times
// the value 0x1.
static const uint32_t CST_LFSR_INIT_VALUE = 0xf8737400;
// Row on which to XOR the constant in Shadow Round A
static const uint32_t SHADOW_RA_CST_ROW = 1;
// Bundle on which to XOR the constant in Shadow Round B
static const uint32_t SHADOW_RB_CST_BUNDLE = 0;
// Update (by 1 step) the constant generation LFSR
static uint32_t update_lfsr(uint32_t lfsr)
{
// Arithmetic shift left, equivalent to
// uint32_t b_out_ext = (lfsr & 0x80000000) ? 0xffffffff : 0x0;
// but constant-time.
uint32_t b_out_ext = (uint32_t)(((int32_t)lfsr) >> 31);
return (lfsr << 1) ^ (b_out_ext & CST_LFSR_POLY_MASK);
}
// Multiplication by polynomial x modulo x^32+x^8+1
static uint32_t xtime(uint32_t x)
{
uint32_t b = x >> 31;
return (x << 1) ^ b ^ (b << 8);
}
// Apply a D-box layer to a Shadow state.
static void dbox_mls_layer(shadow_state state) {
for (unsigned int row = 0; row < LS_ROWS; row++) {
static void dbox_mls_layer(shadow_state state, unsigned int row) {
#if SMALL_PERM
uint32_t x = state[0][row];
uint32_t y = state[1][row];
uint32_t z = state[2][row];
state[0][row] = x ^ y ^ z;
state[1][row] = x ^ z;
state[2][row] = x ^ y;
uint32_t x0 = state[0][row];
uint32_t x1 = state[1][row];
uint32_t x2 = state[2][row];
uint32_t a = x0 ^ x1;
uint32_t b = x0 ^ x2;
uint32_t c = x1 ^ b;
uint32_t d = a ^ xtime(b);
state[0][row] = b ^ d;
state[1][row] = c;
state[2][row] = d;
#else
uint32_t w = state[0][row];
uint32_t x = state[1][row];
uint32_t y = state[2][row];
uint32_t z = state[3][row];
uint32_t u = w ^ x;
uint32_t v = y ^ z;
state[0][row] = x ^ v;
state[1][row] = w ^ v;
state[2][row] = u ^ z;
state[3][row] = u ^ y;
state[0][row] ^= state[1][row];
state[2][row] ^= state[3][row];
state[1][row] ^= state[2][row];
state[3][row] ^= xtime(state[0][row]);
state[1][row] = xtime(state[1][row]);
state[0][row] ^= state[1][row];
state[2][row] ^= xtime(state[3][row]);
state[1][row] ^= state[2][row];
state[3][row] ^= state[0][row];
#endif // SMALL_PERM
}
}
#define XORLSS(DEST, OP, SHIFT) do { \
(DEST)[0] ^= ((OP)[0] << (SHIFT)); \
(DEST)[1] ^= ((OP)[1] << (SHIFT)); \
(DEST)[2] ^= ((OP)[2] << (SHIFT)); \
(DEST)[3] ^= ((OP)[3] << (SHIFT)); } while (0)
// Shadow permutation. Updates state.
void shadow(shadow_state state) {
uint32_t lfsr = CST_LFSR_INIT_VALUE;
for (unsigned int s = 0; s < SHADOW_NS; s++) {
for (unsigned int b = 0; b < MLS_BUNDLES; b++) {
sbox_layer(state[b]);
lbox(&state[b][0], &state[b][1]);
lbox(&state[b][2], &state[b][3]);
XORLSS(state[b], clyde128_rc[2*s], b);
state[b][SHADOW_RA_CST_ROW] ^= lfsr;
lfsr = update_lfsr(lfsr);
sbox_layer(state[b]);
}
dbox_mls_layer(state);
for (unsigned int b = 0; b < MLS_BUNDLES; b++) {
XORLSS(state[b], clyde128_rc[2*s+1], b);
for (unsigned int row = 0; row < LS_ROWS; row++) {
dbox_mls_layer(state, row);
state[SHADOW_RB_CST_BUNDLE][row] ^= lfsr;
lfsr = update_lfsr(lfsr);
}
}
}
CC?=gcc
#CFLAGS:=-std=c99 -Wall -Wextra -g $(CFLAGS)
CFLAGS:=-std=c99 -g $(CFLAGS)
CFLAGS:=-std=c99 -g $(CFLAGS) -flax-vector-conversions
BDIR?=../build_test
SRCDIR?=../src
......
......@@ -29,16 +29,16 @@ popd > /dev/null
test_all_spook_versions()
{
test_build_spook 0 0 c744322005f6d6df1846bc4baa4033047856d8183f502a65604711dd25e87b8c
test_build_spook 0 1 5bd32e37cd41cfd48b6e9fc740c56c32a3154e8acdf7e05310bae9d8f213f41c
test_build_spook 1 0 410c79bf206274bf6145103d1e87c20e17d258cd77c550fd0d33ef30a971c460
test_build_spook 1 1 53d431a078490a709767c0089614fcda87218f11e97ab565d2e84f25bbfcd9cc
test_build_spook 0 0 c9f76c914bbd916c7479493a632cbe1518355bec1564ec99ec6a9e85778f1335
test_build_spook 0 1 b2b7f17c393abfb78a99dc99301667479d82abb7c5f4a57b3060bcc502a772d6
test_build_spook 1 0 3b72ee641cc670ac6e9b1c7abbc707783f006b5d8df272c658f953d62e2d1066
test_build_spook 1 1 d94e35dfee178c385a92eaf5847a81f9c962963cfe3a9bfa849f094c579cdafe
}
test_big_spook_versions()
{
test_build_spook 0 0 c744322005f6d6df1846bc4baa4033047856d8183f502a65604711dd25e87b8c
test_build_spook 1 0 410c79bf206274bf6145103d1e87c20e17d258cd77c550fd0d33ef30a971c460
test_build_spook 0 0 c9f76c914bbd916c7479493a632cbe1518355bec1564ec99ec6a9e85778f1335
test_build_spook 1 0 3b72ee641cc670ac6e9b1c7abbc707783f006b5d8df272c658f953d62e2d1066
}
for ctype in 32bit 64bit
......@@ -49,9 +49,9 @@ do
export SHADOW_TYPE=shadow_$stype;
test_all_spook_versions;
done
for stype in 256bit 512bit
do
export SHADOW_TYPE=shadow_$stype;
test_big_spook_versions;
done
#for stype in 256bit 512bit
#do
# export SHADOW_TYPE=shadow_$stype;
# test_big_spook_versions;
#done
done
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment