Commit 5a5ca8cd authored by Gaëtan Cassiers's avatar Gaëtan Cassiers
Browse files

Add benchmarking

parent dd5f6e06
build_test/
prim_bench_iaca/
prim_bench_real/
spook_bench/
#! /bin/sh
CC=gcc
CFLAGS="-std=gnu99 -g -Ofast -mtune=skylake-avx512"
BENCH_DIR_IACA=../prim_bench_iaca
BENCH_DIR_REAL=../prim_bench_real
export PROC_FREQ=2.0 # Processor frequency (GHz)
# IACA simulation
echo "IACA-based benchmark"
BENCH_RUNS=prim_bench_set_iaca
rm -rf $BENCH_DIR_IACA
mkdir -p $BENCH_DIR_IACA
grep -v ^# < $BENCH_RUNS | while read -r line
do
set -- x $line;
TYPE=$2
PRIM=${TYPE}_$3
ARCH=$4
echo bench $TYPE $PRIM $ARCH ...;
FULLNAME=./$BENCH_DIR_IACA/$PRIM-$ARCH-iaca
$CC $CFLAGS -march=$ARCH -D BENCH_IACA -c ../src/$PRIM.c -o $FULLNAME.o
iaca -arch SKX $FULLNAME.o > $FULLNAME.txt
done
for f in `ls $BENCH_DIR_IACA/*.txt`
do
INSTANCE=`echo $f | awk -F/ '{print $NF}' | cut -d '.' -f 1`
CYCLES=`grep 'Block Throughput' $f | cut -d ' ' -f 3`
echo $INSTANCE $CYCLES >> $BENCH_DIR_IACA/results.txt
done
# Real benchmark
echo "Execution-based benchmark"
BENCH_DIR_REAL=../prim_bench_real
BENCH_RUNS=prim_bench_set_real
N_ITER=100000000
rm -rf $BENCH_DIR_REAL
mkdir -p $BENCH_DIR_REAL
grep -v ^# < $BENCH_RUNS | while read -r line
do
set -- x $line;
TYPE=$2
PRIM=${TYPE}_$3
ARCH=$4
echo bench $TYPE $PRIM $ARCH ...;
FULLNAME=./$BENCH_DIR_REAL/$PRIM-$ARCH-bench
BENCH_HARNESS=bench_$TYPE
$CC $CFLAGS -march=$ARCH -c ../src/$PRIM.c -o $FULLNAME.o
$CC $CFLAGS -march=$ARCH -D N_ITER=$N_ITER -I ../src -c ./src/$BENCH_HARNESS.c -o $BENCH_DIR_REAL/$BENCH_HARNESS.o
$CC $CFLAGS -march=$ARCH $FULLNAME.o $BENCH_DIR_REAL/$BENCH_HARNESS.o -o $FULLNAME
$FULLNAME > $FULLNAME.txt
done
for f in `ls $BENCH_DIR_REAL/*.txt`
do
INSTANCE=`echo $f | awk -F/ '{print $NF}' | cut -d '.' -f 1`
NSITER=`cut -d ' ' -f 2 < $f`
echo $INSTANCE $NSITER >> $BENCH_DIR_REAL/results.txt
done
# Results analysis
export IACA_RES_FILE=$BENCH_DIR_IACA/results.txt
export REAL_RES_FILE=$BENCH_DIR_REAL/results.txt
python3 prim_res_analysis.py
clyde 32bit x86-64
clyde 32bit haswell
clyde 32bit skylake-avx512
clyde 64bit haswell
clyde 64bit skylake-avx512
shadow 128bit x86-64
shadow 128bit haswell
shadow 128bit skylake-avx512
shadow 256bit haswell
shadow 256bit skylake-avx512
shadow 512bit skylake-avx512
clyde 32bit x86-64
clyde 32bit haswell
clyde 32bit skylake-avx512
clyde 64bit haswell
clyde 64bit skylake-avx512
shadow 32bit x86-64
shadow 32bit haswell
shadow 32bit skylake-avx512
shadow 128bit x86-64
shadow 128bit haswell
shadow 128bit skylake-avx512
shadow 256bit haswell
shadow 256bit skylake-avx512
shadow 512bit skylake-avx512
#! /usr/bin/python3
import os
PROC_FREQ=float(os.environ.get('PROC_FREQ', '2.0')) # GHz
PRIM_NS=6 # nb of steps in clyde/shadow (i.e. nmb of iterations of the iaca loop)
IACA_RES_FILE=os.environ.get('IACA_RES_FILE', '../prim_bench_iaca/results.txt')
REAL_RES_FILE=os.environ.get('REAL_RES_FILE', '../prim_bench_real/results.txt')
def parse_prim_id(s):
prim_implem, *arch = s.split('-')[:-1]
arch = '-'.join(arch)
prim = prim_implem.split('_')[0]
return (prim, prim_implem, arch)
def fmt_cycles(cycles):
return '{:.2f}'.format(cycles)
def parse_real_line(s):
try:
prim_id, ns_iter = s.split(' ')
val = fmt_cycles(PROC_FREQ*float(ns_iter))
except ValueError:
# Missing value due to failed benchmark
prim_id = s.strip()
val = ' '
return (parse_prim_id(prim_id), val)
def parse_iaca_line(s):
prim_id, cycles = s.split(' ')[:2]
cycles = PRIM_NS*float(cycles)
return (parse_prim_id(prim_id), fmt_cycles(cycles))
def render_markdown_table(headers, rows, data):
full_table = [[' ']+headers] + [['-' for _ in ['']+headers]] + [[r]+d for r, d in zip(rows, data)]
return '\n'.join('|'+'|'.join(row)+'|' for row in full_table)
iaca_results = dict(map(parse_iaca_line,
open(IACA_RES_FILE).read().splitlines()))
real_results = dict(map(parse_real_line,
open(REAL_RES_FILE).read().splitlines()))
clyde_implems = list(sorted(set(x[1] for x in real_results.keys() if x[0] == 'clyde')))
shadow_implems = list(sorted(set(x[1] for x in real_results.keys() if x[0] == 'shadow')))
archs = ['x86-64', 'haswell', 'skylake-avx512']
clyde_iaca_table = [
[iaca_results.get(('clyde', implem, arch), ' ') for arch in archs]
for implem in clyde_implems]
shadow_iaca_table = [
[iaca_results.get(('shadow', implem, arch), ' ') for arch in archs]
for implem in shadow_implems]
clyde_real_table = [
[real_results.get(('clyde', implem, arch), ' ') for arch in archs]
for implem in clyde_implems]
shadow_real_table = [
[real_results.get(('shadow', implem, arch), ' ') for arch in archs]
for implem in shadow_implems]
print('clyde iaca:\n\n',render_markdown_table(archs, clyde_implems, clyde_iaca_table), '\n')
print('clyde real:\n\n',render_markdown_table(archs, clyde_implems, clyde_real_table), '\n')
print('shadow iaca:\n\n',render_markdown_table(archs, shadow_implems, shadow_iaca_table), '\n')
print('shadow real:\n\n',render_markdown_table(archs, shadow_implems, shadow_real_table), '\n')
#! /bin/sh
CC=gcc
CFLAGS="-std=gnu99 -g -Ofast -mtune=skylake-avx512"
BENCH_DIR=../spook_bench
BENCH_RUNS=spook_bench_set
export PROC_FREQ=2.0 # Processor frequency (GHz)
#N_ITER=100000000
N_ITER=1000000
rm -rf $BENCH_DIR
mkdir -p $BENCH_DIR
grep -v ^# < $BENCH_RUNS | while read -r line
do
set -- x $line;
CLYDE=clyde_$3
SHADOW=shadow_$5
ARCH=$6
TYPE=$2
echo bench $CLYDE $SHADOW $ARCH ...;
# True bench
FULLNAME=./$BENCH_DIR/$CLYDE-$SHADOW-$ARCH-bench
BENCH_HARNESS=bench_spook
CLYDE_O=$BENCH_DIR/$CLYDE-$ARCH.o
SHADOW_O=$BENCH_DIR/$SHADOW-$ARCH.o
$CC $CFLAGS -march=$ARCH -c ../src/$CLYDE.c -o $CLYDE_O
$CC $CFLAGS -march=$ARCH -c ../src/$SHADOW.c -o $SHADOW_O
$CC $CFLAGS -march=$ARCH -c ../src/s1p.c -o $BENCH_DIR/s1p-$ARCH.o
$CC $CFLAGS -march=$ARCH -c ../src/encrypt.c -o $BENCH_DIR/encrypt-$ARCH.o
$CC $CFLAGS -march=$ARCH -I ../src -D N_ITER=$N_ITER -c src/$BENCH_HARNESS.c -o $BENCH_DIR/$BENCH_HARNESS-$ARCH.o
$CC $CFLAGS -march=$ARCH -flto $CLYDE_O $SHADOW_O $BENCH_DIR/s1p-$ARCH.o $BENCH_DIR/encrypt-$ARCH.o $BENCH_DIR/$BENCH_HARNESS-$ARCH.o -o $FULLNAME
$FULLNAME > $FULLNAME.txt
done
for f in `ls $BENCH_DIR/*.txt`
do
INSTANCE=`echo $f | awk -F/ '{print $NF}' | cut -d '.' -f 1`
while read -r line
do
echo "$INSTANCE | $line" >> $BENCH_DIR/results.txt
done < $f
done
# Results analysis
export RES_FILE=$BENCH_DIR/results.txt
python3 spook_max_throughput.py
clyde 32bit shadow 32bit x86-64
clyde 32bit shadow 32bit haswell
clyde 32bit shadow 32bit skylake-avx512
clyde 32bit shadow 128bit x86-64
clyde 32bit shadow 128bit haswell
clyde 32bit shadow 128bit skylake-avx512
clyde 64bit shadow 128bit haswell
clyde 64bit shadow 128bit skylake-avx512
#! /usr/bin/python3
import os
PROC_FREQ=float(os.environ.get('PROC_FREQ', '2.0')) # GHz
RES_FILE=os.environ.get('RES_FILE', '../spook_bench/results.txt')
def parse_spook_id(s):
clyde_f, shadow_f, *arch = s.split('-')[:-1]
arch = '-'.join(arch)
clyde = clyde_f.split('_')[0]
shadow = shadow_f.split('_')[0]
return ((clyde, shadow), '-'.join((clyde_f, shadow_f)), arch)
def fmt_cycles(cycles):
return '{:.2f}'.format(cycles)
def parse_line(s):
try:
spook_id, res = s.split('|')
except ValueError:
# Missing value due to failed benchmark
spook_id = s.strip()
val = None
else:
n_bytes, _, _, ns_iter, _, ns_byte = res.strip().split(' ')
throughput = fmt_cycles(PROC_FREQ*float(ns_byte))
val = (int(n_bytes), throughput)
_, implem, arch = parse_spook_id(spook_id.strip())
return ((implem, arch), val)
def render_markdown_table(headers, rows, data):
full_table = [[' ']+headers] + [['-' for _ in ['']+headers]] + [[r]+d for r, d in zip(rows, data)]
return '\n'.join('|'+'|'.join(row)+'|' for row in full_table)
results = dict()
for (implem, arch), val in map(parse_line, open(RES_FILE).read().splitlines()):
if val is not None:
results.setdefault((implem, arch), dict())[val[0]] = val[1]
implems = list(sorted(set(implem for implem, _ in results.keys())))
archs = ['x86-64', 'haswell', 'skylake-avx512']
max_throughput_table = [
[str(min(results.get((implem, arch), dict()).values(), default=' ')) for arch in archs]
for implem in implems]
print(
'max throughput (cycles/byte):\n\n',
render_markdown_table(archs, implems, max_throughput_table), '\n'
)
/* MIT License
*
* Copyright (c) 2019 Gaëtan Cassiers
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "primitives.h"
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <time.h>
#include <stdint.h>
#ifndef N_ITER
#define N_ITER (10*1000*1000)
#endif // N_ITER
typedef struct timespec timespec;
double time_per_iter(timespec* t0, timespec* tend, uint64_t n_iter) {
int64_t dsec = tend->tv_sec - t0->tv_sec;
int64_t dnsec = tend->tv_nsec - t0->tv_nsec + 1000*1000*1000*dsec;
double nsec_per_iter = ((double) dnsec) / ((double) n_iter);
return nsec_per_iter;
}
int main(void) {
clyde128_state state = {0x3020100, 0x7060504, 0xb0a0908, 0xf0e0d0c};
clyde128_state tweak = { 0, 0, 0, 0};
memset(state, 0, 16);
unsigned char k[16]= {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
uint64_t n_iter = N_ITER;
//uint64_t n_iter = 1;
timespec t0, tend;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &t0);
for (uint64_t i=0; i<n_iter; i++) {
clyde128_encrypt(state, tweak, k);
}
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &tend);
double perf = time_per_iter(&t0, &tend, n_iter);
printf("ns/iter: %.4g\n", perf);
return 0;
}
/* MIT License
*
* Copyright (c) 2019 Gaëtan Cassiers
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "primitives.h"
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <time.h>
#include <stdint.h>
#ifndef N_ITER
#define N_ITER (10*1000*1000)
#endif // N_ITER
typedef struct timespec timespec;
double time_per_iter(timespec* t0, timespec* tend, uint64_t n_iter) {
int64_t dsec = tend->tv_sec - t0->tv_sec;
int64_t dnsec = tend->tv_nsec - t0->tv_nsec + 1000*1000*1000*dsec;
double nsec_per_iter = ((double) dnsec) / ((double) n_iter);
return nsec_per_iter;
}
int main(void) {
shadow_state state;
memset(state, 0, sizeof(state));
uint64_t n_iter = N_ITER;
timespec t0, tend;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &t0);
for (uint64_t i=0; i<n_iter; i++) {
shadow(state);
}
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &tend);
double perf = time_per_iter(&t0, &tend, n_iter);
printf("ns/iter: %.4g\n", perf);
return 0;
}
/* MIT License
*
* Copyright (c) 2019 Gaëtan Cassiers
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <time.h>
#include <stdint.h>
#include <stdlib.h>
#include "api.h"
#include "crypto_aead.h"
#ifndef N_ITER
#define N_ITER (10*1000*1000)
#endif // N_ITER
typedef struct timespec timespec;
double time_per_iter(timespec* t0, timespec* tend, uint64_t n_iter) {
int64_t dsec = tend->tv_sec - t0->tv_sec;
int64_t dnsec = tend->tv_nsec - t0->tv_nsec + 1000*1000*1000*dsec;
double nsec_per_iter = ((double) dnsec) / ((double) n_iter);
return nsec_per_iter;
}
double bench_spook(uint32_t size, uint32_t n_iter) {
unsigned char k[16]= {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
unsigned char nonce[16]= {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30};
unsigned char *m = malloc(size);
assert(m != NULL);
unsigned long long mlen = size;
for (uint32_t i=0; i<size; i++) {
m[i] = i%256;
}
unsigned char *c = malloc(size+16);
unsigned long long clen;
assert(c != NULL);
timespec t0, tend;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &t0);
for (uint32_t i=0; i<n_iter; i++) {
crypto_aead_encrypt(c, &clen, m, mlen, NULL, 0, NULL, nonce, k);
}
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &tend);
double perf = time_per_iter(&t0, &tend, n_iter);
return perf;
}
int main(void) {
uint32_t min_shift = 4;
uint32_t max_shift = 19;
for (uint32_t i=min_shift; i<=max_shift; i++) {
uint32_t size = 1 << i;
double perf = bench_spook(size, N_ITER/size);
printf("%i bytes, ns/iter: %.4g, ns/byte: %.4g\n", size, perf, perf/size);
}
return 0;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment