Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
The diff you're trying to view is too large. We only load the first 3000 changed files.
61 changes: 61 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,3 +1,64 @@
Changes from 20170105: colm* implementations.

Changes from 20161220: More deoxysi* implementations.

Changes from 20161218: Finally included the tweaked version of Ascon.
Sorry for letting this slip through the cracks before!

Changes from 20161026:

* New k277mon and k277taa.
* crypto_aead/norx*v3: fix neon compilation errors.
* crypto_aead/norx*v3: handle overlap better.
* More checksums.

Changes from 20161009:

* CAESAR tweaks: acorn, cloc+silc, deoxys, ketje, morus, norx.
* New *keyak* reference implementation.
* New *jambu* naming.
* New SHA-2 implementations from Dolbeau.
* New crypto_dh/gls254prot/opt implementation.

Changes from 20160910:

* New crypto_dh/k298.
* New crypto_dh/gls254/opt implementation.

Changes from 20160806:

* Bug fixes for *keyakv2/ARM*.
* Many more compiler options.

Beware that this version will take considerably longer to run than the previous version.

Changes from 20160731:

* hs1siv{,lo}v2: faster implementations from Romain Dolbeau.
* Various AVX512 implementations: fixes from Romain Dolbeau.
* stribob192r2/neon/wbob_pineon.c: fixes.
* *poet* fast implementations: fixes.

Changes from 20160724:

* trivia0v2: faster implementation (sse4).
* hs1*v2: bug fixes and faster implementations.
* aeadaes*ocbtaglen128v1: bug fixes in dolbeau implementations.
* pi*v2: bug fixes in goptv implementations.
* *keyakv2: bug fixes in several implementations.
* simonjambu*: bug fixes.
* aarch64 PMCCNTR_EL0 support from Romain Dolbeau.
* aarch64 CNTVCT_EL0 preliminary support.

Changes from 20160715:

* New crypto_aead/*jambu*v2.
* Fixes for crypto_aead/pi*v2.
* Fixes for crypto_aead/hs1*.
* TWEAK_LOW_LATENCY for crypto_aead/norx*.
* Initial aarch64 support from Romain Dolbeau.
* Prioritize scaling_setspeed over scaling_max_freq in reports.

Changes from 20141014:

* Updated poly1305/moon.
Expand Down
35 changes: 35 additions & 0 deletions cpucycles/cortex.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,17 @@ D. J. Bernstein
Public domain.
*/

/*
* To use this, your kernel must enable reading the cycle counter
* from userland code.
*
* Sample code for Armv7 (32 bits):
* <https://github.com/thoughtpolice/enable_arm_pmu>
*
* Sample code for Armv8/Aarch64 (64 bits):
* <https://github.com/rdolbeau/enable_arm_pmu>
*/

#define SCALE 1
#include <time.h>
#include <unistd.h>
Expand All @@ -16,6 +27,29 @@ static unsigned long long prevcycles = 0;
static int now[3];
static long long cyclespersec = 0;

#if defined(__aarch64__)
long long cpucycles_cortex(void)
{
long long Rt;
asm volatile("mrs %0, PMCCNTR_EL0" : "=r" (Rt));
return Rt;
}
long long cpucycles_cortex_persecond(void) {
struct timeval t0,t1;
long long c0,c1;
double d0,d1;
gettimeofday(&t0,(struct timezone *) 0);
c0 = cpucycles_cortex();
sleep(1);
gettimeofday(&t1,(struct timezone *) 0);
c1 = cpucycles_cortex();
d0 = (double) t0.tv_sec;
d0 += ((double) t0.tv_usec) / 1000000.0;
d1 = (double) t1.tv_sec;
d1 += ((double) t1.tv_usec) / 1000000.0;
return (c1-c0)/(d1-d0);
}
#else
static void readticks(unsigned int *result)
{
struct timeval t;
Expand Down Expand Up @@ -71,3 +105,4 @@ long long cpucycles_cortex_persecond(void)
while (!cyclespersec) cpucycles_cortex();
return cyclespersec * SCALE;
}
#endif
50 changes: 50 additions & 0 deletions cpucycles/cortex_vct.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/*
cpucycles/cortex_vct.c version 20101203
D. J. Bernstein
Romain Dolbeau
Public domain.
*/

#define SCALE 1
#include <time.h>
#include <unistd.h>
#include <sys/time.h>
#include <stdlib.h>

#if defined(__aarch64__)
static inline unsigned long long aarch64_timer_get_cntfrq(void) {
unsigned long long val;
asm volatile("mrs %0, CNTFRQ_EL0" : "=r" (val));
return val;
}

#define V8FREQ 1
long long cpucycles_cortex_vct(void)
{
long long Rt;
asm volatile("mrs %0, CNTVCT_EL0" : "=r" (Rt));
return Rt * V8FREQ;
}
long long cpucycles_cortex_vct_persecond(void) {
struct timeval t0,t1;
long long c0,c1;
unsigned long long f;
long long r;
double d0,d1;
gettimeofday(&t0,(struct timezone *) 0);
c0 = cpucycles_cortex_vct();
sleep(1);
gettimeofday(&t1,(struct timezone *) 0);
c1 = cpucycles_cortex_vct();
d0 = (double) t0.tv_sec;
d0 += ((double) t0.tv_usec) / 1000000.0;
d1 = (double) t1.tv_sec;
d1 += ((double) t1.tv_usec) / 1000000.0;
r = (c1-c0)/(d1-d0);
f = aarch64_timer_get_cntfrq();
if (llabs(f-r) < 100)
return f;
/* something is wrong here, fixme ? */
return r;
}
#endif
28 changes: 28 additions & 0 deletions cpucycles/cortex_vct.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/*
cpucycles cortex_vct.h version 20100912
D. J. Bernstein
Romain Dobeau
Public domain.
*/

#ifndef CPUCYCLES_cortex_vct_h
#define CPUCYCLES_cortex_vct_h

#ifdef __cplusplus
extern "C" {
#endif

extern long long cpucycles_cortex_vct(void);
extern long long cpucycles_cortex_vct_persecond(void);

#ifdef __cplusplus
}
#endif

#ifndef cpucycles_implementation
#define cpucycles_implementation "cortex_vct"
#define cpucycles cpucycles_cortex_vct
#define cpucycles_persecond cpucycles_cortex_vct_persecond
#endif

#endif
4 changes: 4 additions & 0 deletions cpucycles/do
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ okabi | (
arm*)
echo cortex
echo dev4ns
;;
aarch64)
echo cortex
echo vct
;;
esac

Expand Down
86 changes: 86 additions & 0 deletions cpucycles/vct.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#include <time.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/time.h>
#include <sys/types.h>
#include "osfreq.c"

static long myround(double u)
{
long result = u;
while (result + 0.5 < u) result += 1;
while (result - 0.5 > u) result -= 1;
return result;
}

static long long microseconds(void)
{
struct timeval t;
gettimeofday(&t,(struct timezone *) 0);
return t.tv_sec * (long long) 1000000 + t.tv_usec;
}

static int tbshift = 0;

static long long timebase(void)
{
unsigned long long result;
asm volatile("mrs %0, CNTVCT_EL0" : "=r" (result));
return result >> tbshift;
}

static double cpufrequency = 0;
static long tbcycles = 0;

static double guesstbcycles(void)
{
long long tb0; long long us0;
long long tb1; long long us1;

tb0 = timebase();
us0 = microseconds();
do {
tb1 = timebase();
us1 = microseconds();
} while (us1 - us0 < 10000 || tb1 - tb0 < 1000);
if (tb1 <= tb0) return 0;
tb1 -= tb0;
us1 -= us0;
return (cpufrequency * 0.000001 * (double) us1) / (double) tb1;
}

static void init(void)
{
int loop;
double guess1;
double guess2;

cpufrequency = osfreq();
if (!cpufrequency) return;

for (tbshift = 0;tbshift < 10;++tbshift) {
for (loop = 0;loop < 100;++loop) {
guess1 = guesstbcycles();
guess2 = guesstbcycles();
tbcycles = myround(guess1);
if (guess1 - tbcycles > 0.1) continue;
if (tbcycles - guess1 > 0.1) continue;
if (guess2 - tbcycles > 0.1) continue;
if (tbcycles - guess2 > 0.1) continue;
return;
}
}
tbcycles = 0;
}

long long cpucycles_vct(void)
{
if (!tbcycles) init();
return timebase() * tbcycles;
}

long long cpucycles_vct_persecond(void)
{
if (!tbcycles) init();
return cpufrequency;
}
28 changes: 28 additions & 0 deletions cpucycles/vct.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/*
cpucycles vct.h version 20160726
D. J. Bernstein
Romain Dobeau
Public domain.
*/

#ifndef CPUCYCLES_vct_h
#define CPUCYCLES_vct_h

#ifdef __cplusplus
extern "C" {
#endif

extern long long cpucycles_vct(void);
extern long long cpucycles_vct_persecond(void);

#ifdef __cplusplus
}
#endif

#ifndef cpucycles_implementation
#define cpucycles_implementation "vct"
#define cpucycles cpucycles_vct
#define cpucycles_persecond cpucycles_vct_persecond
#endif

#endif
Empty file.
Loading