4761 lines
105 KiB
C
4761 lines
105 KiB
C
/****************************************************************************
|
|
* Ralink Tech Inc.
|
|
* Taiwan, R.O.C.
|
|
*
|
|
* (c) Copyright 2002, Ralink Technology, Inc.
|
|
*
|
|
* All rights reserved. Ralink's source code is an unpublished work and the
|
|
* use of a copyright notice does not imply otherwise. This source code
|
|
* contains confidential trade secret material of Ralink Tech. Any attemp
|
|
* or participation in deciphering, decoding, reverse engineering or in any
|
|
* way altering the source code is stricitly prohibited, unless the prior
|
|
* written consent of Ralink Technology, Inc. is obtained.
|
|
***************************************************************************/
|
|
|
|
#ifndef CRYPT_GPL_ALGORITHM
|
|
/****************************************************************************
|
|
Module Name:
|
|
DH
|
|
|
|
Abstract:
|
|
RFC 2631: Diffie-Hellman Key Agreement Method
|
|
|
|
Revision History:
|
|
Who When What
|
|
-------- ---------- ------------------------------------------
|
|
Eddy 2009/01/21 Create Diffie-Hellman
|
|
***************************************************************************/
|
|
#endif /* CRYPT_GPL_ALGORITHM */
|
|
|
|
|
|
#include "crypt_dh.h"
|
|
#include "crypt_biginteger.h"
|
|
|
|
#ifdef CRYPT_GPL_ALGORITHM
|
|
|
|
/*dh_key.h START*/
|
|
#define GETPID_IS_MEANINGLESS
|
|
|
|
#ifndef DHPRINT
|
|
#define DHPRINT(fmt, args...) DBGPRINT(RT_DEBUG_OFF, (fmt, ## args))
|
|
#endif
|
|
|
|
typedef struct bignum_st BIGNUM;
|
|
typedef struct bn_mont_ctx_st BN_MONT_CTX;
|
|
typedef struct env_md_st EVP_MD;
|
|
typedef struct env_md_ctx_st EVP_MD_CTX;
|
|
typedef struct dh_st DH;
|
|
|
|
#define BN_MUL_COMBA
|
|
#define BN_SQR_COMBA
|
|
|
|
#define BN_ULLONG unsigned long
|
|
#define BN_ULONG unsigned int
|
|
#define BN_BITS 64
|
|
#define BN_BYTES 4
|
|
#define BN_BITS2 32
|
|
#define BN_BITS4 16
|
|
#define BN_MASK (0xffffffffffffffffLL)
|
|
#define BN_MASK2 (0xffffffffL)
|
|
#define BN_MASK2l (0xffff)
|
|
#define BN_MASK2h1 (0xffff8000L)
|
|
#define BN_MASK2h (0xffff0000L)
|
|
#define BN_TBIT (0x80000000L)
|
|
#define BN_FLG_MALLOCED 0x01
|
|
#define BN_FLG_STATIC_DATA 0x02
|
|
#define BN_FLG_EXP_CONSTTIME 0x04 /* avoid leaking exponent information through timings
|
|
* (BN_mod_exp_mont() will call BN_mod_exp_mont_consttime) */
|
|
#define BN_set_flags(b,n) ((b)->flags|=(n))
|
|
#define BN_get_flags(b,n) ((b)->flags&(n))
|
|
/* get a clone of a BIGNUM with changed flags, for *temporary* use only
|
|
* (the two BIGNUMs cannot not be used in parallel!) */
|
|
#define BN_with_flags(dest,b,n) ((dest)->d=(b)->d, \
|
|
(dest)->top=(b)->top, \
|
|
(dest)->dmax=(b)->dmax, \
|
|
(dest)->flags=(((dest)->flags & BN_FLG_MALLOCED) \
|
|
| ((b)->flags & ~BN_FLG_MALLOCED) \
|
|
| BN_FLG_STATIC_DATA \
|
|
| (n)))
|
|
|
|
struct bignum_st
|
|
{
|
|
BN_ULONG *d; /* Pointer to an array of 'BN_BITS2' bit chunks. */
|
|
int top; /* Index of last used d +1. */
|
|
/* The next are internal book keeping for bn_expand. */
|
|
int dmax; /* Size of the d array. */
|
|
int flags;
|
|
};
|
|
|
|
/* Used for montgomery multiplication */
|
|
struct bn_mont_ctx_st
|
|
{
|
|
int ri; /* number of bits in R */
|
|
BIGNUM RR; /* used to convert to montgomery form */
|
|
BIGNUM N; /* The modulus */
|
|
BN_ULONG n0; /* least significant word of Ni */
|
|
int flags;
|
|
};
|
|
|
|
#define BN_num_bytes(a) ((BN_num_bits(a)+7)/8)
|
|
#define BN_is_zero(a) ((a)->top == 0)
|
|
#define BN_is_one(a) (((a)->top == 1) && ((a)->d[0] == (BN_ULONG)(1)))
|
|
#define BN_is_odd(a) (((a)->top > 0) && ((a)->d[0] & 1))
|
|
#define BN_one(a) (BN_set_word((a),1))
|
|
#define BN_zero(a) (BN_set_word((a),0))
|
|
#define bn_wexpand(a,words) (((words) <= (a)->dmax)?(a):bn_expand2((a),(words)))
|
|
#define bn_correct_top(a) \
|
|
{ \
|
|
BN_ULONG *ftl; \
|
|
if ((a)->top > 0) \
|
|
{ \
|
|
for (ftl= &((a)->d[(a)->top-1]); (a)->top > 0; (a)->top--) \
|
|
if (*(ftl--)) break; \
|
|
} \
|
|
}
|
|
|
|
/*
|
|
* BN_window_bits_for_exponent_size -- macro for sliding window mod_exp functions
|
|
*
|
|
*
|
|
* For window size 'w' (w >= 2) and a random 'b' bits exponent,
|
|
* the number of multiplications is a constant plus on average
|
|
*
|
|
* 2^(w-1) + (b-w)/(w+1);
|
|
*
|
|
* here 2^(w-1) is for precomputing the table (we actually need
|
|
* entries only for windows that have the lowest bit set), and
|
|
* (b-w)/(w+1) is an approximation for the expected number of
|
|
* w-bit windows, not counting the first one.
|
|
*
|
|
* Thus we should use
|
|
*
|
|
* w >= 6 if b > 671
|
|
* w = 5 if 671 > b > 239
|
|
* w = 4 if 239 > b > 79
|
|
* w = 3 if 79 > b > 23
|
|
* w <= 2 if 23 > b
|
|
*
|
|
* (with draws in between). Very small exponents are often selected
|
|
* with low Hamming weight, so we use w = 1 for b <= 23.
|
|
*/
|
|
#define BN_window_bits_for_exponent_size(b) \
|
|
((b) > 671 ? 6 : \
|
|
(b) > 239 ? 5 : \
|
|
(b) > 79 ? 4 : \
|
|
(b) > 23 ? 3 : 1)
|
|
/* BN_mod_exp_mont_conttime is based on the assumption that the
|
|
* L1 data cache line width of the target processor is at least
|
|
* the following value.
|
|
*/
|
|
#define MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH ( 64 )
|
|
#define MOD_EXP_CTIME_MIN_CACHE_LINE_MASK (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - 1)
|
|
/* Window sizes optimized for fixed window size modular exponentiation
|
|
* algorithm (BN_mod_exp_mont_consttime).
|
|
*
|
|
* To achieve the security goals of BN_mode_exp_mont_consttime, the
|
|
* maximum size of the window must not exceed
|
|
* log_2(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH).
|
|
*
|
|
* Window size thresholds are defined for cache line sizes of 32 and 64,
|
|
* cache line sizes where log_2(32)=5 and log_2(64)=6 respectively. A
|
|
* window size of 7 should only be used on processors that have a 128
|
|
* byte or greater cache line size.
|
|
*/
|
|
#if MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 64
|
|
# define BN_window_bits_for_ctime_exponent_size(b) \
|
|
((b) > 937 ? 6 : \
|
|
(b) > 306 ? 5 : \
|
|
(b) > 89 ? 4 : \
|
|
(b) > 22 ? 3 : 1)
|
|
# define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (6)
|
|
#elif MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 32
|
|
# define BN_window_bits_for_ctime_exponent_size(b) \
|
|
((b) > 306 ? 5 : \
|
|
(b) > 89 ? 4 : \
|
|
(b) > 22 ? 3 : 1)
|
|
# define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (5)
|
|
#endif
|
|
#define BN_MULL_SIZE_NORMAL (16) /* 32 */
|
|
#define BN_MUL_RECURSIVE_SIZE_NORMAL (16) /* 32 less than */
|
|
#define BN_SQR_RECURSIVE_SIZE_NORMAL (16) /* 32 */
|
|
|
|
/*************************************************************
|
|
* Using the long long type
|
|
*/
|
|
#define Lw(t) (((BN_ULONG)(t))&BN_MASK2)
|
|
#define Hw(t) (((BN_ULONG)((t)>>BN_BITS2))&BN_MASK2)
|
|
|
|
/*************************************************************
|
|
* No long long type
|
|
*/
|
|
|
|
#define LBITS(a) ((a)&BN_MASK2l)
|
|
#define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l)
|
|
#define L2HBITS(a) (((a)<<BN_BITS4)&BN_MASK2)
|
|
|
|
#define LLBITS(a) ((a)&BN_MASKl)
|
|
#define LHBITS(a) (((a)>>BN_BITS2)&BN_MASKl)
|
|
#define LL2HBITS(a) ((BN_ULLONG)((a)&BN_MASKl)<<BN_BITS2)
|
|
|
|
#define mul64(l,h,bl,bh) \
|
|
{ \
|
|
BN_ULONG m,m1,lt,ht; \
|
|
\
|
|
lt=l; \
|
|
ht=h; \
|
|
m =(bh)*(lt); \
|
|
lt=(bl)*(lt); \
|
|
m1=(bl)*(ht); \
|
|
ht =(bh)*(ht); \
|
|
m=(m+m1)&BN_MASK2; if (m < m1) ht+=L2HBITS((BN_ULONG)1); \
|
|
ht+=HBITS(m); \
|
|
m1=L2HBITS(m); \
|
|
lt=(lt+m1)&BN_MASK2; if (lt < m1) ht++; \
|
|
(l)=lt; \
|
|
(h)=ht; \
|
|
}
|
|
|
|
#define sqr64(lo,ho,in) \
|
|
{ \
|
|
BN_ULONG l,h,m; \
|
|
\
|
|
h=(in); \
|
|
l=LBITS(h); \
|
|
h=HBITS(h); \
|
|
m =(l)*(h); \
|
|
l*=l; \
|
|
h*=h; \
|
|
h+=(m&BN_MASK2h1)>>(BN_BITS4-1); \
|
|
m =(m&BN_MASK2l)<<(BN_BITS4+1); \
|
|
l=(l+m)&BN_MASK2; if (l < m) h++; \
|
|
(lo)=l; \
|
|
(ho)=h; \
|
|
}
|
|
|
|
#define mul_add(r,a,bl,bh,c) { \
|
|
BN_ULONG l,h; \
|
|
\
|
|
h= (a); \
|
|
l=LBITS(h); \
|
|
h=HBITS(h); \
|
|
mul64(l,h,(bl),(bh)); \
|
|
\
|
|
/* non-multiply part */ \
|
|
l=(l+(c))&BN_MASK2; if (l < (c)) h++; \
|
|
(c)=(r); \
|
|
l=(l+(c))&BN_MASK2; if (l < (c)) h++; \
|
|
(c)=h&BN_MASK2; \
|
|
(r)=l; \
|
|
}
|
|
|
|
#define mul(r,a,bl,bh,c) { \
|
|
BN_ULONG l,h; \
|
|
\
|
|
h= (a); \
|
|
l=LBITS(h); \
|
|
h=HBITS(h); \
|
|
mul64(l,h,(bl),(bh)); \
|
|
\
|
|
/* non-multiply part */ \
|
|
l+=(c); if ((l&BN_MASK2) < (c)) h++; \
|
|
(c)=h&BN_MASK2; \
|
|
(r)=l&BN_MASK2; \
|
|
}
|
|
|
|
|
|
#define INIT_DATA_h0 0x67452301UL
|
|
#define INIT_DATA_h1 0xefcdab89UL
|
|
#define INIT_DATA_h2 0x98badcfeUL
|
|
#define INIT_DATA_h3 0x10325476UL
|
|
#define INIT_DATA_h4 0xc3d2e1f0UL
|
|
#define STATE_SIZE 1023
|
|
#define EXP_TABLE_SIZE 32
|
|
#define BN_mod(rem,m,d,ctx) BN_div(NULL,(rem),(m),(d),(ctx))
|
|
#define BN_CTX_POOL_SIZE 16
|
|
#define MOD_EXP_CTIME_ALIGN(x_) ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((BN_ULONG)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
|
|
#define ENTROPY_NEEDED 32 /* require 256 bits = 32 bytes of randomness */
|
|
#define DH_FLAG_CACHE_MONT_P 0x01
|
|
#define DH_FLAG_NO_EXP_CONSTTIME 0x02 /* new with 0.9.7h; the built-in DH
|
|
* implementation now uses constant time
|
|
* modular exponentiation for secret exponents
|
|
* by default. This flag causes the
|
|
* faster variable sliding window method to
|
|
* be used for all exponents.
|
|
*/
|
|
#define OPENSSL_DH_MAX_MODULUS_BITS 10000
|
|
|
|
struct dh_st
|
|
{
|
|
BIGNUM *p;
|
|
BIGNUM *g;
|
|
BIGNUM *pub_key; /* g^x */
|
|
BIGNUM *priv_key; /* x */
|
|
int flags;
|
|
};
|
|
|
|
struct env_md_st
|
|
{
|
|
int type;
|
|
int md_size;
|
|
unsigned long flags;
|
|
int (*init)(EVP_MD_CTX *ctx);
|
|
int (*update)(EVP_MD_CTX *ctx,const void *data,size_t count);
|
|
int (*final)(EVP_MD_CTX *ctx,unsigned char *md);
|
|
int ctx_size; /* how big does the ctx->md_data need to be */
|
|
} /* EVP_MD */;
|
|
|
|
struct env_md_ctx_st
|
|
{
|
|
const EVP_MD *digest;
|
|
void *md_data;
|
|
} /* EVP_MD_CTX */;
|
|
|
|
/* A bundle of bignums that can be linked with other bundles */
|
|
typedef struct bignum_pool_item
|
|
{
|
|
/* The bignum values */
|
|
BIGNUM vals[BN_CTX_POOL_SIZE];
|
|
/* Linked-list admin */
|
|
struct bignum_pool_item *prev, *next;
|
|
} BN_POOL_ITEM;
|
|
|
|
/* A linked-list of bignums grouped in bundles */
|
|
typedef struct bignum_pool
|
|
{
|
|
/* Linked-list admin */
|
|
BN_POOL_ITEM *head, *currentvar, *tail;
|
|
/* Stack depth and allocation size */
|
|
unsigned used, size;
|
|
} BN_POOL;
|
|
/*dh_key.h END*/
|
|
|
|
/*sha_locl.h START*/
|
|
#define SHA_LONG unsigned int
|
|
#define SHA_LBLOCK 16
|
|
#define SHA_CBLOCK (SHA_LBLOCK*4) /* SHA treats input data as a contiguous array of 32 bit wide big-endian values. */
|
|
#define SHA_DIGEST_LENGTH 20
|
|
typedef struct SHAstate_st
|
|
{
|
|
SHA_LONG h0,h1,h2,h3,h4;
|
|
SHA_LONG Nl,Nh;
|
|
SHA_LONG data[SHA_LBLOCK];
|
|
unsigned int num;
|
|
} OPEN_SHA_CTX;
|
|
#define Xupdate(a,ix,ia,ib,ic,id) ((a)=(ia^ib^ic^id), ix=(a)=ROTATE((a),1))
|
|
|
|
#ifndef ROTATE
|
|
#define ROTATE(a,n) (((a)<<(n))|(((a)&0xffffffff)>>(32-(n))))
|
|
#endif
|
|
|
|
#define HASH_MAKE_STRING(c,s) do { \
|
|
unsigned long ll; \
|
|
ll=(c)->h0; HOST_l2c(ll,(s)); \
|
|
ll=(c)->h1; HOST_l2c(ll,(s)); \
|
|
ll=(c)->h2; HOST_l2c(ll,(s)); \
|
|
ll=(c)->h3; HOST_l2c(ll,(s)); \
|
|
ll=(c)->h4; HOST_l2c(ll,(s)); \
|
|
} while (0)
|
|
|
|
#ifndef HOST_c2l
|
|
#define HOST_c2l(c,l) (l =(((unsigned long)(*((c)++)))<<24), \
|
|
l|=(((unsigned long)(*((c)++)))<<16), \
|
|
l|=(((unsigned long)(*((c)++)))<< 8), \
|
|
l|=(((unsigned long)(*((c)++))) ), \
|
|
l)
|
|
#endif
|
|
|
|
#define HOST_p_c2l(c,l,n) { \
|
|
switch (n) { \
|
|
case 0: l =((unsigned long)(*((c)++)))<<24; \
|
|
case 1: l|=((unsigned long)(*((c)++)))<<16; \
|
|
case 2: l|=((unsigned long)(*((c)++)))<< 8; \
|
|
case 3: l|=((unsigned long)(*((c)++))); \
|
|
} }
|
|
#define HOST_p_c2l_p(c,l,sc,len) { \
|
|
switch (sc) { \
|
|
case 0: l =((unsigned long)(*((c)++)))<<24; \
|
|
if (--len == 0) break; \
|
|
case 1: l|=((unsigned long)(*((c)++)))<<16; \
|
|
if (--len == 0) break; \
|
|
case 2: l|=((unsigned long)(*((c)++)))<< 8; \
|
|
} }
|
|
/* NOTE the pointer is not incremented at the end of this */
|
|
#define HOST_c2l_p(c,l,n) { \
|
|
l=0; (c)+=n; \
|
|
switch (n) { \
|
|
case 3: l =((unsigned long)(*(--(c))))<< 8; \
|
|
case 2: l|=((unsigned long)(*(--(c))))<<16; \
|
|
case 1: l|=((unsigned long)(*(--(c))))<<24; \
|
|
} }
|
|
#ifndef HOST_l2c
|
|
#define HOST_l2c(l,c) (*((c)++)=(unsigned char)(((l)>>24)&0xff), \
|
|
*((c)++)=(unsigned char)(((l)>>16)&0xff), \
|
|
*((c)++)=(unsigned char)(((l)>> 8)&0xff), \
|
|
*((c)++)=(unsigned char)(((l) )&0xff), \
|
|
l)
|
|
#endif
|
|
|
|
#define K_00_19 0x5a827999UL
|
|
#define K_20_39 0x6ed9eba1UL
|
|
#define K_40_59 0x8f1bbcdcUL
|
|
#define K_60_79 0xca62c1d6UL
|
|
|
|
/* As pointed out by Wei Dai <weidai@eskimo.com>, F() below can be
|
|
* simplified to the code in F_00_19. Wei attributes these optimisations
|
|
* to Peter Gutmann's SHS code, and he attributes it to Rich Schroeppel.
|
|
* #define F(x,y,z) (((x) & (y)) | ((~(x)) & (z)))
|
|
* I've just become aware of another tweak to be made, again from Wei Dai,
|
|
* in F_40_59, (x&a)|(y&a) -> (x|y)&a
|
|
*/
|
|
#define F_00_19(b,c,d) ((((c) ^ (d)) & (b)) ^ (d))
|
|
#define F_20_39(b,c,d) ((b) ^ (c) ^ (d))
|
|
#define F_40_59(b,c,d) (((b) & (c)) | (((b)|(c)) & (d)))
|
|
#define F_60_79(b,c,d) F_20_39(b,c,d)
|
|
|
|
#define BODY_00_15(xi) do { \
|
|
T=E+K_00_19+F_00_19(B,C,D); \
|
|
E=D, D=C, C=ROTATE(B,30), B=A; \
|
|
A=ROTATE(A,5)+T+xi; } while(0)
|
|
#define BODY_16_19(xa,xb,xc,xd) do { \
|
|
Xupdate(T,xa,xa,xb,xc,xd); \
|
|
T+=E+K_00_19+F_00_19(B,C,D); \
|
|
E=D, D=C, C=ROTATE(B,30), B=A; \
|
|
A=ROTATE(A,5)+T; } while(0)
|
|
#define BODY_20_39(xa,xb,xc,xd) do { \
|
|
Xupdate(T,xa,xa,xb,xc,xd); \
|
|
T+=E+K_20_39+F_20_39(B,C,D); \
|
|
E=D, D=C, C=ROTATE(B,30), B=A; \
|
|
A=ROTATE(A,5)+T; } while(0)
|
|
#define BODY_40_59(xa,xb,xc,xd) do { \
|
|
Xupdate(T,xa,xa,xb,xc,xd); \
|
|
T+=E+K_40_59+F_40_59(B,C,D); \
|
|
E=D, D=C, C=ROTATE(B,30), B=A; \
|
|
A=ROTATE(A,5)+T; } while(0)
|
|
#define BODY_60_79(xa,xb,xc,xd) do { \
|
|
Xupdate(T,xa,xa,xb,xc,xd); \
|
|
T=E+K_60_79+F_60_79(B,C,D); \
|
|
E=D, D=C, C=ROTATE(B,30), B=A; \
|
|
A=ROTATE(A,5)+T+xa; } while(0)
|
|
static void sha1_block_host_order (OPEN_SHA_CTX *c, const void *d, size_t num)
|
|
{
|
|
const SHA_LONG *W=d;
|
|
register unsigned long A,B,C,D,E,T;
|
|
int i;
|
|
SHA_LONG X[16];
|
|
|
|
A=c->h0;
|
|
B=c->h1;
|
|
C=c->h2;
|
|
D=c->h3;
|
|
E=c->h4;
|
|
|
|
for (;;)
|
|
{
|
|
for (i=0;i<16;i++)
|
|
{ X[i]=W[i]; BODY_00_15(X[i]); }
|
|
for (i=0;i<4;i++)
|
|
{ BODY_16_19(X[i], X[i+2], X[i+8], X[(i+13)&15]); }
|
|
for (;i<24;i++)
|
|
{ BODY_20_39(X[i&15], X[(i+2)&15], X[(i+8)&15],X[(i+13)&15]); }
|
|
for (i=0;i<20;i++)
|
|
{ BODY_40_59(X[(i+8)&15],X[(i+10)&15],X[i&15], X[(i+5)&15]); }
|
|
for (i=4;i<24;i++)
|
|
{ BODY_60_79(X[(i+8)&15],X[(i+10)&15],X[i&15], X[(i+5)&15]); }
|
|
|
|
c->h0=(c->h0+A)&0xffffffffL;
|
|
c->h1=(c->h1+B)&0xffffffffL;
|
|
c->h2=(c->h2+C)&0xffffffffL;
|
|
c->h3=(c->h3+D)&0xffffffffL;
|
|
c->h4=(c->h4+E)&0xffffffffL;
|
|
|
|
if (--num == 0) break;
|
|
|
|
A=c->h0;
|
|
B=c->h1;
|
|
C=c->h2;
|
|
D=c->h3;
|
|
E=c->h4;
|
|
|
|
W+=SHA_LBLOCK;
|
|
}
|
|
}
|
|
|
|
static void sha1_block_data_order (OPEN_SHA_CTX *c, const void *p, size_t num)
|
|
{
|
|
const unsigned char *data=p;
|
|
register unsigned long A,B,C,D,E,T,l;
|
|
int i;
|
|
SHA_LONG X[16];
|
|
|
|
A=c->h0;
|
|
B=c->h1;
|
|
C=c->h2;
|
|
D=c->h3;
|
|
E=c->h4;
|
|
|
|
for (;;)
|
|
{
|
|
for (i=0;i<16;i++)
|
|
{ HOST_c2l(data,l); X[i]=l; BODY_00_15(X[i]); }
|
|
for (i=0;i<4;i++)
|
|
{ BODY_16_19(X[i], X[i+2], X[i+8], X[(i+13)&15]); }
|
|
for (;i<24;i++)
|
|
{ BODY_20_39(X[i&15], X[(i+2)&15], X[(i+8)&15],X[(i+13)&15]); }
|
|
for (i=0;i<20;i++)
|
|
{ BODY_40_59(X[(i+8)&15],X[(i+10)&15],X[i&15], X[(i+5)&15]); }
|
|
for (i=4;i<24;i++)
|
|
{ BODY_60_79(X[(i+8)&15],X[(i+10)&15],X[i&15], X[(i+5)&15]); }
|
|
|
|
c->h0=(c->h0+A)&0xffffffffL;
|
|
c->h1=(c->h1+B)&0xffffffffL;
|
|
c->h2=(c->h2+C)&0xffffffffL;
|
|
c->h3=(c->h3+D)&0xffffffffL;
|
|
c->h4=(c->h4+E)&0xffffffffL;
|
|
|
|
if (--num == 0) break;
|
|
|
|
A=c->h0;
|
|
B=c->h1;
|
|
C=c->h2;
|
|
D=c->h3;
|
|
E=c->h4;
|
|
|
|
}
|
|
}
|
|
/*sha_locl.h END*/
|
|
|
|
#define NID_sha1 64
|
|
|
|
static int g_state_num=0,g_state_index=0;
|
|
static unsigned char g_md[SHA_DIGEST_LENGTH];
|
|
static unsigned char g_state[STATE_SIZE+SHA_DIGEST_LENGTH];
|
|
static long g_md_count[2]={0,0};
|
|
static long g_entropy=0;
|
|
static int RAND_initialized=0;
|
|
|
|
static BIGNUM *bn_expand2(BIGNUM *b, int words);
|
|
static int HASH_FINAL (unsigned char *md, OPEN_SHA_CTX*c);
|
|
static int HASH_UPDATE (OPEN_SHA_CTX *c, const void *data_, size_t len);
|
|
static int HASH_INIT (OPEN_SHA_CTX *c);
|
|
static int RAND_poll(void);
|
|
static int RAND_init(EVP_MD_CTX *ctx)
|
|
{ return HASH_INIT(ctx->md_data); }
|
|
static int RAND_update(EVP_MD_CTX *ctx,const void *data,size_t count)
|
|
{ return HASH_UPDATE(ctx->md_data,data,count); }
|
|
static int RAND_final(EVP_MD_CTX *ctx,unsigned char *md)
|
|
{ return HASH_FINAL(md,ctx->md_data); }
|
|
static void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb);
|
|
#ifdef BN_RECURSION
|
|
static void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
|
|
int dna, int dnb, BN_ULONG *t);
|
|
static void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n,
|
|
int tna, int tnb, BN_ULONG *t);
|
|
static void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b);
|
|
#endif /* BN_RECURSION */
|
|
static int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
|
|
const BIGNUM *m, BN_POOL *ctx, BN_MONT_CTX *in_mont);
|
|
static int BN_rshift1(BIGNUM *r, const BIGNUM *a);
|
|
static BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w);
|
|
static BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w);
|
|
static BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n);
|
|
static BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n);
|
|
static void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b);
|
|
static void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a);
|
|
static void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a);
|
|
|
|
|
|
|
|
|
|
static int HASH_FINAL (unsigned char *md, OPEN_SHA_CTX*c)
|
|
{
|
|
register SHA_LONG *p;
|
|
register unsigned long l;
|
|
register int i,j;
|
|
static const unsigned char end[4]={0x80,0x00,0x00,0x00};
|
|
const unsigned char *cp=end;
|
|
|
|
/* c->num should definitly have room for at least one more byte. */
|
|
p=c->data;
|
|
i=c->num>>2;
|
|
j=c->num&0x03;
|
|
|
|
l = (j==0) ? 0 : p[i];
|
|
HOST_p_c2l(cp,l,j); p[i++]=l; /* i is the next 'undefined word' */
|
|
|
|
if (i>(SHA_LBLOCK-2)) /* save room for Nl and Nh */
|
|
{
|
|
if (i<SHA_LBLOCK) p[i]=0;
|
|
sha1_block_host_order (c,p,1);
|
|
i=0;
|
|
}
|
|
for (; i<(SHA_LBLOCK-2); i++)
|
|
p[i]=0;
|
|
|
|
p[SHA_LBLOCK-2]=c->Nh;
|
|
p[SHA_LBLOCK-1]=c->Nl;
|
|
sha1_block_host_order (c,p,1);
|
|
|
|
#ifndef HASH_MAKE_STRING
|
|
#error "HASH_MAKE_STRING must be defined!"
|
|
#else
|
|
HASH_MAKE_STRING(c,md);
|
|
#endif
|
|
|
|
c->num=0;
|
|
return 1;
|
|
}
|
|
|
|
static int HASH_UPDATE (OPEN_SHA_CTX *c, const void *data_, size_t len)
|
|
{
|
|
const unsigned char *data=data_;
|
|
register SHA_LONG * p;
|
|
register SHA_LONG l;
|
|
size_t sw,sc,ew,ec;
|
|
|
|
if (len==0) return 1;
|
|
|
|
l=(c->Nl+(((SHA_LONG)len)<<3))&0xffffffffUL;
|
|
/* 95-05-24 eay Fixed a bug with the overflow handling, thanks to
|
|
* Wei Dai <weidai@eskimo.com> for pointing it out. */
|
|
if (l < c->Nl) /* overflow */
|
|
c->Nh++;
|
|
c->Nh+=(len>>29); /* might cause compiler warning on 16-bit */
|
|
c->Nl=l;
|
|
|
|
if (c->num != 0)
|
|
{
|
|
p=c->data;
|
|
sw=c->num>>2;
|
|
sc=c->num&0x03;
|
|
|
|
if ((c->num+len) >= SHA_CBLOCK)
|
|
{
|
|
l=p[sw]; HOST_p_c2l(data,l,sc); p[sw++]=l;
|
|
for (; sw<SHA_LBLOCK; sw++)
|
|
{
|
|
HOST_c2l(data,l); p[sw]=l;
|
|
}
|
|
sha1_block_host_order(c,p,1);
|
|
len-=(SHA_CBLOCK-c->num);
|
|
c->num=0;
|
|
/* drop through and do the rest */
|
|
}
|
|
else
|
|
{
|
|
c->num+=(unsigned int)len;
|
|
if ((sc+len) < 4) /* ugly, add char's to a word */
|
|
{
|
|
l=p[sw]; HOST_p_c2l_p(data,l,sc,len); p[sw]=l;
|
|
}
|
|
else
|
|
{
|
|
ew=(c->num>>2);
|
|
ec=(c->num&0x03);
|
|
if (sc)
|
|
l=p[sw];
|
|
HOST_p_c2l(data,l,sc);
|
|
p[sw++]=l;
|
|
for (; sw < ew; sw++)
|
|
{
|
|
HOST_c2l(data,l); p[sw]=l;
|
|
}
|
|
if (ec)
|
|
{
|
|
HOST_c2l_p(data,l,ec); p[sw]=l;
|
|
}
|
|
}
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
sw=len/SHA_CBLOCK;
|
|
if (sw > 0)
|
|
{
|
|
{
|
|
sha1_block_data_order(c,data,sw);
|
|
sw*=SHA_CBLOCK;
|
|
data+=sw;
|
|
len-=sw;
|
|
}
|
|
}
|
|
|
|
if (len!=0)
|
|
{
|
|
p = c->data;
|
|
c->num = len;
|
|
ew=len>>2; /* words to copy */
|
|
ec=len&0x03;
|
|
for (; ew; ew--,p++)
|
|
{
|
|
HOST_c2l(data,l); *p=l;
|
|
}
|
|
HOST_c2l_p(data,l,ec);
|
|
*p=l;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
static int HASH_INIT (OPEN_SHA_CTX *c)
|
|
{
|
|
c->h0=INIT_DATA_h0;
|
|
c->h1=INIT_DATA_h1;
|
|
c->h2=INIT_DATA_h2;
|
|
c->h3=INIT_DATA_h3;
|
|
c->h4=INIT_DATA_h4;
|
|
c->Nl=0;
|
|
c->Nh=0;
|
|
c->num=0;
|
|
return 1;
|
|
}
|
|
|
|
static BN_POOL *BN_CTX_new(void)
|
|
{
|
|
BN_POOL *ret;
|
|
os_alloc_mem(NULL, (UCHAR **)&ret, sizeof(BN_POOL));
|
|
if(!ret)
|
|
{
|
|
return NULL;
|
|
}
|
|
/* Initialise the structure */
|
|
ret->head = ret->currentvar = ret->tail = NULL;
|
|
ret->used = ret->size = 0;
|
|
return ret;
|
|
}
|
|
|
|
static const BIGNUM *BN_value_one(void)
|
|
{
|
|
static BN_ULONG data_one=1L;
|
|
static BIGNUM const_one={&data_one,1,1,BN_FLG_STATIC_DATA};
|
|
|
|
return(&const_one);
|
|
}
|
|
|
|
static int BN_num_bits_word(BN_ULONG l)
|
|
{
|
|
static const char bits[256]={
|
|
0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,
|
|
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
|
|
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
|
|
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
|
|
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
|
|
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
|
|
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
|
|
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
|
|
8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
|
|
8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
|
|
8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
|
|
8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
|
|
8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
|
|
8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
|
|
8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
|
|
8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
|
|
};
|
|
|
|
{
|
|
if (l & 0xffff0000L)
|
|
{
|
|
if (l & 0xff000000L)
|
|
return(bits[(int)(l>>24L)]+24);
|
|
else return(bits[(int)(l>>16L)]+16);
|
|
}
|
|
else
|
|
{
|
|
if (l & 0xff00L)
|
|
return(bits[(int)(l>>8)]+8);
|
|
else
|
|
return(bits[(int)(l )] );
|
|
}
|
|
}
|
|
}
|
|
|
|
static int BN_num_bits(const BIGNUM *a)
|
|
{
|
|
int i = a->top - 1;
|
|
|
|
if (BN_is_zero(a)) return 0;
|
|
return ((i*BN_BITS2) + BN_num_bits_word(a->d[i]));
|
|
}
|
|
|
|
static void BN_clear_free(BIGNUM *a)
|
|
{
|
|
int i;
|
|
|
|
if (a == NULL) return;
|
|
if (a->d != NULL)
|
|
{
|
|
if (!(BN_get_flags(a,BN_FLG_STATIC_DATA)))
|
|
os_free_mem(NULL, a->d);
|
|
}
|
|
i=BN_get_flags(a,BN_FLG_MALLOCED);
|
|
if (i)
|
|
os_free_mem(NULL, a);
|
|
}
|
|
|
|
static void BN_free(BIGNUM *a)
|
|
{
|
|
if (a == NULL) return;
|
|
if ((a->d != NULL) && !(BN_get_flags(a,BN_FLG_STATIC_DATA)))
|
|
os_free_mem(NULL, a->d);
|
|
if (a->flags & BN_FLG_MALLOCED)
|
|
os_free_mem(NULL, a);
|
|
else
|
|
{
|
|
a->d = NULL;
|
|
}
|
|
}
|
|
|
|
static void BN_init(BIGNUM *a)
|
|
{
|
|
memset(a,0,sizeof(BIGNUM));
|
|
}
|
|
|
|
static BIGNUM *BN_new(void)
|
|
{
|
|
BIGNUM *ret;
|
|
|
|
os_alloc_mem(NULL, (UCHAR **)&ret, sizeof(BIGNUM));
|
|
if (ret == NULL)
|
|
{
|
|
return(NULL);
|
|
}
|
|
ret->flags=BN_FLG_MALLOCED;
|
|
ret->top=0;
|
|
ret->dmax=0;
|
|
ret->d=NULL;
|
|
return(ret);
|
|
}
|
|
|
|
/* This is used both by bn_expand2() and bn_dup_expand() */
|
|
/* The caller MUST check that words > b->dmax before calling this */
|
|
static BN_ULONG *bn_expand_internal(const BIGNUM *b, int words)
|
|
{
|
|
BN_ULONG *A,*a = NULL;
|
|
const BN_ULONG *B;
|
|
int i;
|
|
|
|
if (words > (INT_MAX/(4*BN_BITS2)))
|
|
{
|
|
return NULL;
|
|
}
|
|
if (BN_get_flags(b,BN_FLG_STATIC_DATA))
|
|
{
|
|
return(NULL);
|
|
}
|
|
os_alloc_mem(NULL, (UCHAR **)&A, sizeof(BN_ULONG)*words);
|
|
a = A;
|
|
if (A == NULL)
|
|
{
|
|
return(NULL);
|
|
}
|
|
B=b->d;
|
|
/* Check if the previous number needs to be copied */
|
|
if (B != NULL)
|
|
{
|
|
for (i=b->top>>2; i>0; i--,A+=4,B+=4)
|
|
{
|
|
/*
|
|
* The fact that the loop is unrolled
|
|
* 4-wise is a tribute to Intel. It's
|
|
* the one that doesn't have enough
|
|
* registers to accomodate more data.
|
|
* I'd unroll it 8-wise otherwise:-)
|
|
*
|
|
* <appro@fy.chalmers.se>
|
|
*/
|
|
BN_ULONG a0,a1,a2,a3;
|
|
a0=B[0]; a1=B[1]; a2=B[2]; a3=B[3];
|
|
A[0]=a0; A[1]=a1; A[2]=a2; A[3]=a3;
|
|
}
|
|
switch (b->top&3)
|
|
{
|
|
case 3: A[2]=B[2];
|
|
case 2: A[1]=B[1];
|
|
case 1: A[0]=B[0];
|
|
case 0: /* workaround for ultrix cc: without 'case 0', the optimizer does
|
|
* the switch table by doing a=top&3; a--; goto jump_table[a];
|
|
* which fails for top== 0 */
|
|
;
|
|
}
|
|
}
|
|
|
|
return(a);
|
|
}
|
|
|
|
/* This is an internal function that should not be used in applications.
|
|
* It ensures that 'b' has enough room for a 'words' word number
|
|
* and initialises any unused part of b->d with leading zeros.
|
|
* It is mostly used by the various BIGNUM routines. If there is an error,
|
|
* NULL is returned. If not, 'b' is returned. */
|
|
|
|
static BIGNUM *bn_expand2(BIGNUM *b, int words)
|
|
{
|
|
if (words > b->dmax)
|
|
{
|
|
BN_ULONG *a = bn_expand_internal(b, words);
|
|
if(!a)
|
|
return NULL;
|
|
if(b->d)
|
|
os_free_mem(NULL, b->d);
|
|
b->d=a;
|
|
b->dmax=words;
|
|
}
|
|
|
|
return b;
|
|
}
|
|
|
|
static void BN_clear(BIGNUM *a)
|
|
{
|
|
if (a->d != NULL)
|
|
memset(a->d,0,a->dmax*sizeof(a->d[0]));
|
|
a->top=0;
|
|
}
|
|
|
|
static int BN_set_word(BIGNUM *a, BN_ULONG w)
|
|
{
|
|
if (NULL == ((1 <= (a)->dmax)?(a):bn_expand2((a),1)))
|
|
{
|
|
DHPRINT("BN_set_word NULL\n");
|
|
return(0);
|
|
}
|
|
a->d[0] = w;
|
|
a->top = (w ? 1 : 0);
|
|
return(1);
|
|
}
|
|
|
|
static BIGNUM *BN_bin2bn(const unsigned char *s, int len, BIGNUM *ret)
|
|
{
|
|
unsigned int i,m;
|
|
unsigned int n;
|
|
BN_ULONG l;
|
|
BIGNUM *bn = NULL;
|
|
|
|
if (ret == NULL)
|
|
ret = bn = BN_new();
|
|
if (ret == NULL) return(NULL);
|
|
l=0;
|
|
n=len;
|
|
if (n == 0)
|
|
{
|
|
ret->top=0;
|
|
return(ret);
|
|
}
|
|
i=((n-1)/BN_BYTES)+1;
|
|
m=((n-1)%(BN_BYTES));
|
|
if (bn_wexpand(ret, (int)i) == NULL)
|
|
{
|
|
BN_free(bn);
|
|
return NULL;
|
|
}
|
|
|
|
ret->top=i;
|
|
while (n--)
|
|
{
|
|
l=(l<<8L)| *(s++);
|
|
if (m-- == 0)
|
|
{
|
|
ret->d[--i]=l;
|
|
l=0;
|
|
m=BN_BYTES-1;
|
|
}
|
|
}
|
|
/* need to call this due to clear byte at top if avoiding
|
|
* having the top bit set (-ve number) */
|
|
bn_correct_top(ret);
|
|
return(ret);
|
|
}
|
|
|
|
static int BN_ucmp(const BIGNUM *a, const BIGNUM *b)
|
|
{
|
|
int i;
|
|
BN_ULONG t1,t2,*ap,*bp;
|
|
|
|
i=a->top-b->top;
|
|
if (i != 0) return(i);
|
|
ap=a->d;
|
|
bp=b->d;
|
|
for (i=a->top-1; i>=0; i--)
|
|
{
|
|
t1= ap[i];
|
|
t2= bp[i];
|
|
if (t1 != t2)
|
|
return((t1 > t2) ? 1 : -1);
|
|
}
|
|
return(0);
|
|
}
|
|
|
|
/* ignore negative */
|
|
static int BN_bn2bin(const BIGNUM *a, unsigned char *to)
|
|
{
|
|
int n,i;
|
|
BN_ULONG l;
|
|
|
|
n=i=BN_num_bytes(a);
|
|
while (i--)
|
|
{
|
|
l=a->d[i/BN_BYTES];
|
|
*(to++)=(unsigned char)(l>>(8*(i%BN_BYTES)))&0xff;
|
|
}
|
|
return(n);
|
|
}
|
|
#ifdef BN_RECURSION
|
|
static int BN_cmp(const BIGNUM *a, const BIGNUM *b)
|
|
{
|
|
int i;
|
|
int gt,lt;
|
|
BN_ULONG t1,t2;
|
|
|
|
if ((a == NULL) || (b == NULL))
|
|
{
|
|
if (a != NULL)
|
|
return(-1);
|
|
else if (b != NULL)
|
|
return(1);
|
|
else
|
|
return(0);
|
|
}
|
|
|
|
{ gt=1; lt= -1; }
|
|
|
|
if (a->top > b->top) return(gt);
|
|
if (a->top < b->top) return(lt);
|
|
for (i=a->top-1; i>=0; i--)
|
|
{
|
|
t1=a->d[i];
|
|
t2=b->d[i];
|
|
if (t1 > t2) return(gt);
|
|
if (t1 < t2) return(lt);
|
|
}
|
|
return(0);
|
|
}
|
|
#endif /*BN_RECURSION */
|
|
static int BN_set_bit(BIGNUM *a, int n)
|
|
{
|
|
int i,j,k;
|
|
|
|
if (n < 0)
|
|
return 0;
|
|
|
|
i=n/BN_BITS2;
|
|
j=n%BN_BITS2;
|
|
if (a->top <= i)
|
|
{
|
|
if (bn_wexpand(a,i+1) == NULL) return(0);
|
|
for(k=a->top; k<i+1; k++)
|
|
a->d[k]=0;
|
|
a->top=i+1;
|
|
}
|
|
|
|
a->d[i]|=(((BN_ULONG)1)<<j);
|
|
return(1);
|
|
}
|
|
|
|
static int BN_is_bit_set(const BIGNUM *a, int n)
|
|
{
|
|
int i,j;
|
|
|
|
if (n < 0) return 0;
|
|
i=n/BN_BITS2;
|
|
j=n%BN_BITS2;
|
|
if (a->top <= i) return 0;
|
|
return((a->d[i]&(((BN_ULONG)1)<<j))?1:0);
|
|
}
|
|
|
|
#ifdef BN_RECURSION
|
|
static int bn_cmp_words(const BN_ULONG *a, const BN_ULONG *b, int n)
|
|
{
|
|
int i;
|
|
BN_ULONG aa,bb;
|
|
|
|
aa=a[n-1];
|
|
bb=b[n-1];
|
|
if (aa != bb) return((aa > bb)?1:-1);
|
|
for (i=n-2; i>=0; i--)
|
|
{
|
|
aa=a[i];
|
|
bb=b[i];
|
|
if (aa != bb) return((aa > bb)?1:-1);
|
|
}
|
|
return(0);
|
|
}
|
|
|
|
/* Here follows a specialised variants of bn_cmp_words(). It has the
|
|
property of performing the operation on arrays of different sizes.
|
|
The sizes of those arrays is expressed through cl, which is the
|
|
common length ( basicall, min(len(a),len(b)) ), and dl, which is the
|
|
delta between the two lengths, calculated as len(a)-len(b).
|
|
All lengths are the number of BN_ULONGs... */
|
|
static int bn_cmp_part_words(const BN_ULONG *a, const BN_ULONG *b,
|
|
int cl, int dl)
|
|
{
|
|
int n,i;
|
|
n = cl-1;
|
|
|
|
if (dl < 0)
|
|
{
|
|
for (i=dl; i<0; i++)
|
|
{
|
|
if (b[n-i] != 0)
|
|
return -1; /* a < b */
|
|
}
|
|
}
|
|
if (dl > 0)
|
|
{
|
|
for (i=dl; i>0; i--)
|
|
{
|
|
if (a[n+i] != 0)
|
|
return 1; /* a > b */
|
|
}
|
|
}
|
|
return bn_cmp_words(a,b,cl);
|
|
}
|
|
#endif /*BN_RECURSION */
|
|
static int BN_rshift1(BIGNUM *r, const BIGNUM *a)
|
|
{
|
|
BN_ULONG *ap,*rp,t,c;
|
|
int i;
|
|
|
|
if (BN_is_zero(a))
|
|
{
|
|
BN_zero(r);
|
|
return(1);
|
|
}
|
|
if (a != r)
|
|
{
|
|
if (bn_wexpand(r,a->top) == NULL) return(0);
|
|
r->top=a->top;
|
|
}
|
|
ap=a->d;
|
|
rp=r->d;
|
|
c=0;
|
|
for (i=a->top-1; i>=0; i--)
|
|
{
|
|
t=ap[i];
|
|
rp[i]=((t>>1)&BN_MASK2)|c;
|
|
c=(t&1)?BN_TBIT:0;
|
|
}
|
|
bn_correct_top(r);
|
|
return(1);
|
|
}
|
|
|
|
static int BN_lshift(BIGNUM *r, const BIGNUM *a, int n)
|
|
{
|
|
int i,nw,lb,rb;
|
|
BN_ULONG *t,*f;
|
|
BN_ULONG l;
|
|
|
|
nw=n/BN_BITS2;
|
|
if (bn_wexpand(r,a->top+nw+1) == NULL) return(0);
|
|
lb=n%BN_BITS2;
|
|
rb=BN_BITS2-lb;
|
|
f=a->d;
|
|
t=r->d;
|
|
t[a->top+nw]=0;
|
|
if (lb == 0)
|
|
for (i=a->top-1; i>=0; i--)
|
|
t[nw+i]=f[i];
|
|
else
|
|
for (i=a->top-1; i>=0; i--)
|
|
{
|
|
l=f[i];
|
|
t[nw+i+1]|=(l>>rb)&BN_MASK2;
|
|
t[nw+i]=(l<<lb)&BN_MASK2;
|
|
}
|
|
memset(t,0,nw*sizeof(t[0]));
|
|
/* for (i=0; i<nw; i++)
|
|
t[i]=0;*/
|
|
r->top=a->top+nw+1;
|
|
bn_correct_top(r);
|
|
return(1);
|
|
}
|
|
|
|
static int BN_rshift(BIGNUM *r, const BIGNUM *a, int n)
|
|
{
|
|
int i,j,nw,lb,rb;
|
|
BN_ULONG *t,*f;
|
|
BN_ULONG l,tmp;
|
|
|
|
nw=n/BN_BITS2;
|
|
rb=n%BN_BITS2;
|
|
lb=BN_BITS2-rb;
|
|
if (nw > a->top || a->top == 0)
|
|
{
|
|
BN_zero(r);
|
|
return(1);
|
|
}
|
|
if (r != a)
|
|
{
|
|
if (bn_wexpand(r,a->top-nw+1) == NULL) return(0);
|
|
}
|
|
else
|
|
{
|
|
if (n == 0)
|
|
return 1; /* or the copying loop will go berserk */
|
|
}
|
|
|
|
f= &(a->d[nw]);
|
|
t=r->d;
|
|
j=a->top-nw;
|
|
r->top=j;
|
|
|
|
if (rb == 0)
|
|
{
|
|
for (i=j; i != 0; i--)
|
|
*(t++)= *(f++);
|
|
}
|
|
else
|
|
{
|
|
l= *(f++);
|
|
for (i=j-1; i != 0; i--)
|
|
{
|
|
tmp =(l>>rb)&BN_MASK2;
|
|
l= *(f++);
|
|
*(t++) =(tmp|(l<<lb))&BN_MASK2;
|
|
}
|
|
*(t++) =(l>>rb)&BN_MASK2;
|
|
}
|
|
bn_correct_top(r);
|
|
return(1);
|
|
}
|
|
|
|
static void BN_POOL_release(BN_POOL *p, unsigned int num)
|
|
{
|
|
unsigned int offset = (p->used - 1) % BN_CTX_POOL_SIZE;
|
|
p->used -= num;
|
|
while(num--)
|
|
{
|
|
if(!offset)
|
|
{
|
|
offset = BN_CTX_POOL_SIZE - 1;
|
|
p->currentvar = p->currentvar->prev;
|
|
}
|
|
else
|
|
offset--;
|
|
}
|
|
}
|
|
|
|
static void BN_POOL_finish(BN_POOL *p)
|
|
{
|
|
while(p->head)
|
|
{
|
|
BIGNUM *bn=p->head->vals;
|
|
unsigned int loop = 0;
|
|
while(loop++ < BN_CTX_POOL_SIZE)
|
|
{
|
|
BIGNUM *tmp = bn; /* avoid compile warning: use freed pointer */
|
|
if(bn->d) BN_clear_free(tmp);
|
|
bn++;
|
|
}
|
|
p->currentvar = p->head->next;
|
|
os_free_mem(NULL, p->head);
|
|
p->head = p->currentvar;
|
|
}
|
|
}
|
|
|
|
|
|
static BIGNUM *BN_POOL_get(BN_POOL *p)
|
|
{
|
|
BIGNUM *ret=NULL;
|
|
if(p->used == p->size)
|
|
{
|
|
BIGNUM *bn;
|
|
unsigned int loop = 0;
|
|
BN_POOL_ITEM *item;
|
|
os_alloc_mem(NULL, (UCHAR **)&item, sizeof(BN_POOL_ITEM));
|
|
if(!item) goto end;
|
|
/* Initialise the structure */
|
|
bn = item->vals;
|
|
while(loop++ < BN_CTX_POOL_SIZE)
|
|
BN_init(bn++);
|
|
item->prev = p->tail;
|
|
item->next = NULL;
|
|
/* Link it in */
|
|
if(!p->head)
|
|
p->head = p->currentvar = p->tail = item;
|
|
else
|
|
{
|
|
p->tail->next = item;
|
|
p->tail = item;
|
|
p->currentvar = item;
|
|
}
|
|
p->size += BN_CTX_POOL_SIZE;
|
|
p->used++;
|
|
/* Return the first bignum from the new pool */
|
|
ret = item->vals;
|
|
}
|
|
if(!p->used)
|
|
p->currentvar = p->head;
|
|
else if((p->used % BN_CTX_POOL_SIZE) == 0)
|
|
p->currentvar = p->currentvar->next;
|
|
ret = p->currentvar->vals + ((p->used) % BN_CTX_POOL_SIZE);
|
|
p->used++;
|
|
end:
|
|
if(ret == NULL)
|
|
{
|
|
DHPRINT("BN_POOL_get ERROR !!!");
|
|
}
|
|
else
|
|
{
|
|
/* OK, make sure the returned bignum is "zero" */
|
|
BN_zero(ret);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static BIGNUM *BN_copy(BIGNUM *a, const BIGNUM *b)
|
|
{
|
|
int i;
|
|
BN_ULONG *A;
|
|
const BN_ULONG *B;
|
|
|
|
if (0 == BN_ucmp(a,b)) return(a);
|
|
if (bn_wexpand(a,b->top) == NULL) return(NULL);
|
|
|
|
A=a->d;
|
|
B=b->d;
|
|
for (i=b->top>>2; i>0; i--,A+=4,B+=4)
|
|
{
|
|
BN_ULONG a0,a1,a2,a3;
|
|
a0=B[0]; a1=B[1]; a2=B[2]; a3=B[3];
|
|
A[0]=a0; A[1]=a1; A[2]=a2; A[3]=a3;
|
|
}
|
|
switch (b->top&3)
|
|
{
|
|
case 3: A[2]=B[2];
|
|
case 2: A[1]=B[1];
|
|
case 1: A[0]=B[0];
|
|
case 0: ; /* ultrix cc workaround, see comments in bn_expand_internal */
|
|
}
|
|
|
|
a->top=b->top;
|
|
return(a);
|
|
}
|
|
|
|
/* Divide h,l by d and return the result. */
|
|
/* I need to test this some more :-( */
|
|
BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
|
|
{
|
|
BN_ULONG dh,dl,q,ret=0,th,tl,t;
|
|
int i,count=2;
|
|
|
|
if (d == 0) return(BN_MASK2);
|
|
|
|
i=BN_num_bits_word(d);
|
|
|
|
i=BN_BITS2-i;
|
|
if (h >= d) h-=d;
|
|
|
|
if (i)
|
|
{
|
|
d<<=i;
|
|
h=(h<<i)|(l>>(BN_BITS2-i));
|
|
l<<=i;
|
|
}
|
|
dh=(d&BN_MASK2h)>>BN_BITS4;
|
|
dl=(d&BN_MASK2l);
|
|
for (;;)
|
|
{
|
|
if ((h>>BN_BITS4) == dh)
|
|
q=BN_MASK2l;
|
|
else
|
|
q=h/dh;
|
|
|
|
th=q*dh;
|
|
tl=dl*q;
|
|
for (;;)
|
|
{
|
|
t=h-th;
|
|
if ((t&BN_MASK2h) ||
|
|
((tl) <= (
|
|
(t<<BN_BITS4)|
|
|
((l&BN_MASK2h)>>BN_BITS4))))
|
|
break;
|
|
q--;
|
|
th-=dh;
|
|
tl-=dl;
|
|
}
|
|
t=(tl>>BN_BITS4);
|
|
tl=(tl<<BN_BITS4)&BN_MASK2h;
|
|
th+=t;
|
|
|
|
if (l < tl) th++;
|
|
l-=tl;
|
|
if (h < th)
|
|
{
|
|
h+=d;
|
|
q--;
|
|
}
|
|
h-=th;
|
|
|
|
if (--count == 0) break;
|
|
|
|
ret=q<<BN_BITS4;
|
|
h=((h<<BN_BITS4)|(l>>BN_BITS4))&BN_MASK2;
|
|
l=(l&BN_MASK2l)<<BN_BITS4;
|
|
}
|
|
ret|=q;
|
|
return(ret);
|
|
}
|
|
|
|
/* BN_div computes dv := num / divisor, rounding towards zero, and sets up
|
|
* rm such that dv*divisor + rm = num holds.
|
|
* If 'dv' or 'rm' is NULL, the respective value is not returned.
|
|
*/
|
|
static int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
|
|
BN_POOL *ctx)
|
|
{
|
|
int release=0;
|
|
int norm_shift,i,loop;
|
|
BIGNUM *tmp,wnum,*snum,*sdiv,*res;
|
|
BN_ULONG *resp,*wnump;
|
|
BN_ULONG d0,d1;
|
|
int num_n,div_n;
|
|
|
|
if (BN_is_zero(divisor))
|
|
{
|
|
return(0);
|
|
}
|
|
|
|
if (BN_ucmp(num,divisor) < 0)
|
|
{
|
|
if (rm != NULL)
|
|
{ if (BN_copy(rm,num) == NULL) return(0); }
|
|
if (dv != NULL) BN_zero(dv);
|
|
return(1);
|
|
}
|
|
|
|
tmp=BN_POOL_get(ctx);
|
|
snum=BN_POOL_get(ctx);
|
|
sdiv=BN_POOL_get(ctx);
|
|
release+=3;
|
|
if (dv == NULL)
|
|
{res=BN_POOL_get(ctx);release++;}
|
|
else res=dv;
|
|
if (sdiv == NULL || res == NULL) goto err;
|
|
|
|
/* First we normalise the numbers */
|
|
norm_shift=BN_BITS2-((BN_num_bits(divisor))%BN_BITS2);
|
|
if (!(BN_lshift(sdiv,divisor,norm_shift))) goto err;
|
|
norm_shift+=BN_BITS2;
|
|
if (!(BN_lshift(snum,num,norm_shift))) goto err;
|
|
div_n=sdiv->top;
|
|
num_n=snum->top;
|
|
loop=num_n-div_n;
|
|
/* Lets setup a 'window' into snum
|
|
* This is the part that corresponds to the current
|
|
* 'area' being divided */
|
|
wnum.d = &(snum->d[loop]);
|
|
wnum.top = div_n;
|
|
/* only needed when BN_ucmp messes up the values between top and max */
|
|
wnum.dmax = snum->dmax - loop; /* so we don't step out of bounds */
|
|
|
|
/* Get the top 2 words of sdiv */
|
|
/* div_n=sdiv->top; */
|
|
d0=sdiv->d[div_n-1];
|
|
d1=(div_n == 1)?0:sdiv->d[div_n-2];
|
|
|
|
/* pointer to the 'top' of snum */
|
|
wnump= &(snum->d[num_n-1]);
|
|
|
|
/* Setup to 'res' */
|
|
if (!bn_wexpand(res,(loop+1))) goto err;
|
|
res->top=loop;
|
|
resp= &(res->d[loop-1]);
|
|
|
|
/* space for temp */
|
|
if (!bn_wexpand(tmp,(div_n+1))) goto err;
|
|
|
|
if (BN_ucmp(&wnum,sdiv) >= 0)
|
|
{
|
|
bn_sub_words(wnum.d, wnum.d, sdiv->d, div_n);
|
|
*resp=1;
|
|
}
|
|
else
|
|
res->top--;
|
|
/* if res->top == 0 then clear the neg value otherwise decrease
|
|
* the resp pointer */
|
|
if (res->top != 0)
|
|
resp--;
|
|
|
|
for (i=0; i<loop-1; i++, wnump--, resp--)
|
|
{
|
|
BN_ULONG q,l0;
|
|
/* the first part of the loop uses the top two words of
|
|
* snum and sdiv to calculate a BN_ULONG q such that
|
|
* | wnum - sdiv * q | < sdiv */
|
|
BN_ULONG n0,n1,rem=0;
|
|
|
|
n0=wnump[0];
|
|
n1=wnump[-1];
|
|
if (n0 == d0)
|
|
q=BN_MASK2;
|
|
else /* n0 < d0 */
|
|
{
|
|
BN_ULONG t2l,t2h,ql,qh;
|
|
|
|
q=bn_div_words(n0,n1,d0);
|
|
rem=(n1-q*d0)&BN_MASK2;
|
|
|
|
t2l=LBITS(d1); t2h=HBITS(d1);
|
|
ql =LBITS(q); qh =HBITS(q);
|
|
mul64(t2l,t2h,ql,qh); /* t2=(BN_ULLONG)d1*q; */
|
|
|
|
for (;;)
|
|
{
|
|
if ((t2h < rem) ||
|
|
((t2h == rem) && (t2l <= wnump[-2])))
|
|
break;
|
|
q--;
|
|
rem += d0;
|
|
if (rem < d0) break; /* don't let rem overflow */
|
|
if (t2l < d1) t2h--; t2l -= d1;
|
|
}
|
|
}
|
|
|
|
l0=bn_mul_words(tmp->d,sdiv->d,div_n,q);
|
|
tmp->d[div_n]=l0;
|
|
wnum.d--;
|
|
/* ingore top values of the bignums just sub the two
|
|
* BN_ULONG arrays with bn_sub_words */
|
|
if (bn_sub_words(wnum.d, wnum.d, tmp->d, div_n+1))
|
|
{
|
|
/* Note: As we have considered only the leading
|
|
* two BN_ULONGs in the calculation of q, sdiv * q
|
|
* might be greater than wnum (but then (q-1) * sdiv
|
|
* is less or equal than wnum)
|
|
*/
|
|
q--;
|
|
if (bn_add_words(wnum.d, wnum.d, sdiv->d, div_n))
|
|
/* we can't have an overflow here (assuming
|
|
* that q != 0, but if q == 0 then tmp is
|
|
* zero anyway) */
|
|
(*wnump)++;
|
|
}
|
|
/* store part of the result */
|
|
*resp = q;
|
|
}
|
|
bn_correct_top(snum);
|
|
if (rm != NULL)
|
|
{
|
|
/* Keep a copy of the neg flag in num because if rm==num
|
|
* BN_rshift() will overwrite it.
|
|
*/
|
|
BN_rshift(rm,snum,norm_shift);
|
|
}
|
|
BN_POOL_release(ctx,release);
|
|
return(1);
|
|
err:
|
|
BN_POOL_release(ctx,release);
|
|
return(0);
|
|
}
|
|
/*bn_div END*/
|
|
|
|
/*bn_asm START*/
|
|
static BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
|
|
{
|
|
BN_ULONG c=0;
|
|
BN_ULONG bl,bh;
|
|
|
|
if (num <= 0) return((BN_ULONG)0);
|
|
|
|
bl=LBITS(w);
|
|
bh=HBITS(w);
|
|
|
|
for (;;)
|
|
{
|
|
mul_add(rp[0],ap[0],bl,bh,c);
|
|
if (--num == 0) break;
|
|
mul_add(rp[1],ap[1],bl,bh,c);
|
|
if (--num == 0) break;
|
|
mul_add(rp[2],ap[2],bl,bh,c);
|
|
if (--num == 0) break;
|
|
mul_add(rp[3],ap[3],bl,bh,c);
|
|
if (--num == 0) break;
|
|
ap+=4;
|
|
rp+=4;
|
|
}
|
|
return(c);
|
|
}
|
|
|
|
static BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
|
|
{
|
|
BN_ULONG carry=0;
|
|
BN_ULONG bl,bh;
|
|
|
|
if (num <= 0) return((BN_ULONG)0);
|
|
|
|
bl=LBITS(w);
|
|
bh=HBITS(w);
|
|
|
|
for (;;)
|
|
{
|
|
mul(rp[0],ap[0],bl,bh,carry);
|
|
if (--num == 0) break;
|
|
mul(rp[1],ap[1],bl,bh,carry);
|
|
if (--num == 0) break;
|
|
mul(rp[2],ap[2],bl,bh,carry);
|
|
if (--num == 0) break;
|
|
mul(rp[3],ap[3],bl,bh,carry);
|
|
if (--num == 0) break;
|
|
ap+=4;
|
|
rp+=4;
|
|
}
|
|
return(carry);
|
|
}
|
|
|
|
static void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
|
|
{
|
|
if (n <= 0) return;
|
|
for (;;)
|
|
{
|
|
sqr64(r[0],r[1],a[0]);
|
|
if (--n == 0) break;
|
|
|
|
sqr64(r[2],r[3],a[1]);
|
|
if (--n == 0) break;
|
|
|
|
sqr64(r[4],r[5],a[2]);
|
|
if (--n == 0) break;
|
|
|
|
sqr64(r[6],r[7],a[3]);
|
|
if (--n == 0) break;
|
|
|
|
a+=4;
|
|
r+=8;
|
|
}
|
|
}
|
|
|
|
static BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
|
|
{
|
|
BN_ULONG c,l,t;
|
|
|
|
if (n <= 0) return((BN_ULONG)0);
|
|
|
|
c=0;
|
|
for (;;)
|
|
{
|
|
t=a[0];
|
|
t=(t+c)&BN_MASK2;
|
|
c=(t < c);
|
|
l=(t+b[0])&BN_MASK2;
|
|
c+=(l < t);
|
|
r[0]=l;
|
|
if (--n <= 0) break;
|
|
|
|
t=a[1];
|
|
t=(t+c)&BN_MASK2;
|
|
c=(t < c);
|
|
l=(t+b[1])&BN_MASK2;
|
|
c+=(l < t);
|
|
r[1]=l;
|
|
if (--n <= 0) break;
|
|
|
|
t=a[2];
|
|
t=(t+c)&BN_MASK2;
|
|
c=(t < c);
|
|
l=(t+b[2])&BN_MASK2;
|
|
c+=(l < t);
|
|
r[2]=l;
|
|
if (--n <= 0) break;
|
|
|
|
t=a[3];
|
|
t=(t+c)&BN_MASK2;
|
|
c=(t < c);
|
|
l=(t+b[3])&BN_MASK2;
|
|
c+=(l < t);
|
|
r[3]=l;
|
|
if (--n <= 0) break;
|
|
|
|
a+=4;
|
|
b+=4;
|
|
r+=4;
|
|
}
|
|
return((BN_ULONG)c);
|
|
}
|
|
|
|
static BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
|
|
{
|
|
BN_ULONG t1,t2;
|
|
int c=0;
|
|
|
|
if (n <= 0) return((BN_ULONG)0);
|
|
|
|
for (;;)
|
|
{
|
|
t1=a[0]; t2=b[0];
|
|
r[0]=(t1-t2-c)&BN_MASK2;
|
|
if (t1 != t2) c=(t1 < t2);
|
|
if (--n <= 0) break;
|
|
|
|
t1=a[1]; t2=b[1];
|
|
r[1]=(t1-t2-c)&BN_MASK2;
|
|
if (t1 != t2) c=(t1 < t2);
|
|
if (--n <= 0) break;
|
|
|
|
t1=a[2]; t2=b[2];
|
|
r[2]=(t1-t2-c)&BN_MASK2;
|
|
if (t1 != t2) c=(t1 < t2);
|
|
if (--n <= 0) break;
|
|
|
|
t1=a[3]; t2=b[3];
|
|
r[3]=(t1-t2-c)&BN_MASK2;
|
|
if (t1 != t2) c=(t1 < t2);
|
|
if (--n <= 0) break;
|
|
|
|
a+=4;
|
|
b+=4;
|
|
r+=4;
|
|
}
|
|
return(c);
|
|
}
|
|
|
|
#ifdef BN_MUL_COMBA
|
|
|
|
#undef bn_mul_comba8
|
|
#undef bn_mul_comba4
|
|
#undef bn_sqr_comba8
|
|
#undef bn_sqr_comba4
|
|
|
|
/* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */
|
|
/* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
|
|
/* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
|
|
/* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
|
|
|
|
#define mul_add_c(a,b,c0,c1,c2) \
|
|
t1=LBITS(a); t2=HBITS(a); \
|
|
bl=LBITS(b); bh=HBITS(b); \
|
|
mul64(t1,t2,bl,bh); \
|
|
c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
|
|
c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
|
|
|
|
#define mul_add_c2(a,b,c0,c1,c2) \
|
|
t1=LBITS(a); t2=HBITS(a); \
|
|
bl=LBITS(b); bh=HBITS(b); \
|
|
mul64(t1,t2,bl,bh); \
|
|
if (t2 & BN_TBIT) c2++; \
|
|
t2=(t2+t2)&BN_MASK2; \
|
|
if (t1 & BN_TBIT) t2++; \
|
|
t1=(t1+t1)&BN_MASK2; \
|
|
c0=(c0+t1)&BN_MASK2; \
|
|
if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \
|
|
c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
|
|
|
|
#define sqr_add_c(a,i,c0,c1,c2) \
|
|
sqr64(t1,t2,(a)[i]); \
|
|
c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
|
|
c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
|
|
|
|
#define sqr_add_c2(a,i,j,c0,c1,c2) \
|
|
mul_add_c2((a)[i],(a)[j],c0,c1,c2)
|
|
|
|
static void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
|
|
{
|
|
BN_ULONG bl,bh;
|
|
BN_ULONG t1,t2;
|
|
BN_ULONG c1,c2,c3;
|
|
|
|
c1=0;
|
|
c2=0;
|
|
c3=0;
|
|
mul_add_c(a[0],b[0],c1,c2,c3);
|
|
r[0]=c1;
|
|
c1=0;
|
|
mul_add_c(a[0],b[1],c2,c3,c1);
|
|
mul_add_c(a[1],b[0],c2,c3,c1);
|
|
r[1]=c2;
|
|
c2=0;
|
|
mul_add_c(a[2],b[0],c3,c1,c2);
|
|
mul_add_c(a[1],b[1],c3,c1,c2);
|
|
mul_add_c(a[0],b[2],c3,c1,c2);
|
|
r[2]=c3;
|
|
c3=0;
|
|
mul_add_c(a[0],b[3],c1,c2,c3);
|
|
mul_add_c(a[1],b[2],c1,c2,c3);
|
|
mul_add_c(a[2],b[1],c1,c2,c3);
|
|
mul_add_c(a[3],b[0],c1,c2,c3);
|
|
r[3]=c1;
|
|
c1=0;
|
|
mul_add_c(a[4],b[0],c2,c3,c1);
|
|
mul_add_c(a[3],b[1],c2,c3,c1);
|
|
mul_add_c(a[2],b[2],c2,c3,c1);
|
|
mul_add_c(a[1],b[3],c2,c3,c1);
|
|
mul_add_c(a[0],b[4],c2,c3,c1);
|
|
r[4]=c2;
|
|
c2=0;
|
|
mul_add_c(a[0],b[5],c3,c1,c2);
|
|
mul_add_c(a[1],b[4],c3,c1,c2);
|
|
mul_add_c(a[2],b[3],c3,c1,c2);
|
|
mul_add_c(a[3],b[2],c3,c1,c2);
|
|
mul_add_c(a[4],b[1],c3,c1,c2);
|
|
mul_add_c(a[5],b[0],c3,c1,c2);
|
|
r[5]=c3;
|
|
c3=0;
|
|
mul_add_c(a[6],b[0],c1,c2,c3);
|
|
mul_add_c(a[5],b[1],c1,c2,c3);
|
|
mul_add_c(a[4],b[2],c1,c2,c3);
|
|
mul_add_c(a[3],b[3],c1,c2,c3);
|
|
mul_add_c(a[2],b[4],c1,c2,c3);
|
|
mul_add_c(a[1],b[5],c1,c2,c3);
|
|
mul_add_c(a[0],b[6],c1,c2,c3);
|
|
r[6]=c1;
|
|
c1=0;
|
|
mul_add_c(a[0],b[7],c2,c3,c1);
|
|
mul_add_c(a[1],b[6],c2,c3,c1);
|
|
mul_add_c(a[2],b[5],c2,c3,c1);
|
|
mul_add_c(a[3],b[4],c2,c3,c1);
|
|
mul_add_c(a[4],b[3],c2,c3,c1);
|
|
mul_add_c(a[5],b[2],c2,c3,c1);
|
|
mul_add_c(a[6],b[1],c2,c3,c1);
|
|
mul_add_c(a[7],b[0],c2,c3,c1);
|
|
r[7]=c2;
|
|
c2=0;
|
|
mul_add_c(a[7],b[1],c3,c1,c2);
|
|
mul_add_c(a[6],b[2],c3,c1,c2);
|
|
mul_add_c(a[5],b[3],c3,c1,c2);
|
|
mul_add_c(a[4],b[4],c3,c1,c2);
|
|
mul_add_c(a[3],b[5],c3,c1,c2);
|
|
mul_add_c(a[2],b[6],c3,c1,c2);
|
|
mul_add_c(a[1],b[7],c3,c1,c2);
|
|
r[8]=c3;
|
|
c3=0;
|
|
mul_add_c(a[2],b[7],c1,c2,c3);
|
|
mul_add_c(a[3],b[6],c1,c2,c3);
|
|
mul_add_c(a[4],b[5],c1,c2,c3);
|
|
mul_add_c(a[5],b[4],c1,c2,c3);
|
|
mul_add_c(a[6],b[3],c1,c2,c3);
|
|
mul_add_c(a[7],b[2],c1,c2,c3);
|
|
r[9]=c1;
|
|
c1=0;
|
|
mul_add_c(a[7],b[3],c2,c3,c1);
|
|
mul_add_c(a[6],b[4],c2,c3,c1);
|
|
mul_add_c(a[5],b[5],c2,c3,c1);
|
|
mul_add_c(a[4],b[6],c2,c3,c1);
|
|
mul_add_c(a[3],b[7],c2,c3,c1);
|
|
r[10]=c2;
|
|
c2=0;
|
|
mul_add_c(a[4],b[7],c3,c1,c2);
|
|
mul_add_c(a[5],b[6],c3,c1,c2);
|
|
mul_add_c(a[6],b[5],c3,c1,c2);
|
|
mul_add_c(a[7],b[4],c3,c1,c2);
|
|
r[11]=c3;
|
|
c3=0;
|
|
mul_add_c(a[7],b[5],c1,c2,c3);
|
|
mul_add_c(a[6],b[6],c1,c2,c3);
|
|
mul_add_c(a[5],b[7],c1,c2,c3);
|
|
r[12]=c1;
|
|
c1=0;
|
|
mul_add_c(a[6],b[7],c2,c3,c1);
|
|
mul_add_c(a[7],b[6],c2,c3,c1);
|
|
r[13]=c2;
|
|
c2=0;
|
|
mul_add_c(a[7],b[7],c3,c1,c2);
|
|
r[14]=c3;
|
|
r[15]=c1;
|
|
}
|
|
#ifdef BN_RECURSION
|
|
static void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
|
|
{
|
|
BN_ULONG bl,bh;
|
|
BN_ULONG t1,t2;
|
|
BN_ULONG c1,c2,c3;
|
|
|
|
c1=0;
|
|
c2=0;
|
|
c3=0;
|
|
mul_add_c(a[0],b[0],c1,c2,c3);
|
|
r[0]=c1;
|
|
c1=0;
|
|
mul_add_c(a[0],b[1],c2,c3,c1);
|
|
mul_add_c(a[1],b[0],c2,c3,c1);
|
|
r[1]=c2;
|
|
c2=0;
|
|
mul_add_c(a[2],b[0],c3,c1,c2);
|
|
mul_add_c(a[1],b[1],c3,c1,c2);
|
|
mul_add_c(a[0],b[2],c3,c1,c2);
|
|
r[2]=c3;
|
|
c3=0;
|
|
mul_add_c(a[0],b[3],c1,c2,c3);
|
|
mul_add_c(a[1],b[2],c1,c2,c3);
|
|
mul_add_c(a[2],b[1],c1,c2,c3);
|
|
mul_add_c(a[3],b[0],c1,c2,c3);
|
|
r[3]=c1;
|
|
c1=0;
|
|
mul_add_c(a[3],b[1],c2,c3,c1);
|
|
mul_add_c(a[2],b[2],c2,c3,c1);
|
|
mul_add_c(a[1],b[3],c2,c3,c1);
|
|
r[4]=c2;
|
|
c2=0;
|
|
mul_add_c(a[2],b[3],c3,c1,c2);
|
|
mul_add_c(a[3],b[2],c3,c1,c2);
|
|
r[5]=c3;
|
|
c3=0;
|
|
mul_add_c(a[3],b[3],c1,c2,c3);
|
|
r[6]=c1;
|
|
r[7]=c2;
|
|
}
|
|
#endif /*BN_RECURSION */
|
|
static void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
|
|
{
|
|
BN_ULONG bl,bh;
|
|
BN_ULONG t1,t2;
|
|
BN_ULONG c1,c2,c3;
|
|
|
|
c1=0;
|
|
c2=0;
|
|
c3=0;
|
|
sqr_add_c(a,0,c1,c2,c3);
|
|
r[0]=c1;
|
|
c1=0;
|
|
sqr_add_c2(a,1,0,c2,c3,c1);
|
|
r[1]=c2;
|
|
c2=0;
|
|
sqr_add_c(a,1,c3,c1,c2);
|
|
sqr_add_c2(a,2,0,c3,c1,c2);
|
|
r[2]=c3;
|
|
c3=0;
|
|
sqr_add_c2(a,3,0,c1,c2,c3);
|
|
sqr_add_c2(a,2,1,c1,c2,c3);
|
|
r[3]=c1;
|
|
c1=0;
|
|
sqr_add_c(a,2,c2,c3,c1);
|
|
sqr_add_c2(a,3,1,c2,c3,c1);
|
|
sqr_add_c2(a,4,0,c2,c3,c1);
|
|
r[4]=c2;
|
|
c2=0;
|
|
sqr_add_c2(a,5,0,c3,c1,c2);
|
|
sqr_add_c2(a,4,1,c3,c1,c2);
|
|
sqr_add_c2(a,3,2,c3,c1,c2);
|
|
r[5]=c3;
|
|
c3=0;
|
|
sqr_add_c(a,3,c1,c2,c3);
|
|
sqr_add_c2(a,4,2,c1,c2,c3);
|
|
sqr_add_c2(a,5,1,c1,c2,c3);
|
|
sqr_add_c2(a,6,0,c1,c2,c3);
|
|
r[6]=c1;
|
|
c1=0;
|
|
sqr_add_c2(a,7,0,c2,c3,c1);
|
|
sqr_add_c2(a,6,1,c2,c3,c1);
|
|
sqr_add_c2(a,5,2,c2,c3,c1);
|
|
sqr_add_c2(a,4,3,c2,c3,c1);
|
|
r[7]=c2;
|
|
c2=0;
|
|
sqr_add_c(a,4,c3,c1,c2);
|
|
sqr_add_c2(a,5,3,c3,c1,c2);
|
|
sqr_add_c2(a,6,2,c3,c1,c2);
|
|
sqr_add_c2(a,7,1,c3,c1,c2);
|
|
r[8]=c3;
|
|
c3=0;
|
|
sqr_add_c2(a,7,2,c1,c2,c3);
|
|
sqr_add_c2(a,6,3,c1,c2,c3);
|
|
sqr_add_c2(a,5,4,c1,c2,c3);
|
|
r[9]=c1;
|
|
c1=0;
|
|
sqr_add_c(a,5,c2,c3,c1);
|
|
sqr_add_c2(a,6,4,c2,c3,c1);
|
|
sqr_add_c2(a,7,3,c2,c3,c1);
|
|
r[10]=c2;
|
|
c2=0;
|
|
sqr_add_c2(a,7,4,c3,c1,c2);
|
|
sqr_add_c2(a,6,5,c3,c1,c2);
|
|
r[11]=c3;
|
|
c3=0;
|
|
sqr_add_c(a,6,c1,c2,c3);
|
|
sqr_add_c2(a,7,5,c1,c2,c3);
|
|
r[12]=c1;
|
|
c1=0;
|
|
sqr_add_c2(a,7,6,c2,c3,c1);
|
|
r[13]=c2;
|
|
c2=0;
|
|
sqr_add_c(a,7,c3,c1,c2);
|
|
r[14]=c3;
|
|
r[15]=c1;
|
|
}
|
|
|
|
static void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
|
|
{
|
|
BN_ULONG bl,bh;
|
|
BN_ULONG t1,t2;
|
|
BN_ULONG c1,c2,c3;
|
|
|
|
c1=0;
|
|
c2=0;
|
|
c3=0;
|
|
sqr_add_c(a,0,c1,c2,c3);
|
|
r[0]=c1;
|
|
c1=0;
|
|
sqr_add_c2(a,1,0,c2,c3,c1);
|
|
r[1]=c2;
|
|
c2=0;
|
|
sqr_add_c(a,1,c3,c1,c2);
|
|
sqr_add_c2(a,2,0,c3,c1,c2);
|
|
r[2]=c3;
|
|
c3=0;
|
|
sqr_add_c2(a,3,0,c1,c2,c3);
|
|
sqr_add_c2(a,2,1,c1,c2,c3);
|
|
r[3]=c1;
|
|
c1=0;
|
|
sqr_add_c(a,2,c2,c3,c1);
|
|
sqr_add_c2(a,3,1,c2,c3,c1);
|
|
r[4]=c2;
|
|
c2=0;
|
|
sqr_add_c2(a,3,2,c3,c1,c2);
|
|
r[5]=c3;
|
|
c3=0;
|
|
sqr_add_c(a,3,c1,c2,c3);
|
|
r[6]=c1;
|
|
r[7]=c2;
|
|
}
|
|
#endif /* !BN_MUL_COMBA */
|
|
/*bn_asm END*/
|
|
|
|
/*bn_mul START*/
|
|
/* Here follows specialised variants of bn_add_words() and
|
|
bn_sub_words(). They have the property performing operations on
|
|
arrays of different sizes. The sizes of those arrays is expressed through
|
|
cl, which is the common length ( basicall, min(len(a),len(b)) ), and dl,
|
|
which is the delta between the two lengths, calculated as len(a)-len(b).
|
|
All lengths are the number of BN_ULONGs... For the operations that require
|
|
a result array as parameter, it must have the length cl+abs(dl).
|
|
These functions should probably end up in bn_asm.c as soon as there are
|
|
assembler counterparts for the systems that use assembler files. */
|
|
#ifdef BN_RECURSION
|
|
static BN_ULONG bn_sub_part_words(BN_ULONG *r,
|
|
const BN_ULONG *a, const BN_ULONG *b,
|
|
int cl, int dl)
|
|
{
|
|
BN_ULONG c, t;
|
|
|
|
c = bn_sub_words(r, a, b, cl);
|
|
|
|
if (dl == 0)
|
|
return c;
|
|
|
|
r += cl;
|
|
a += cl;
|
|
b += cl;
|
|
|
|
if (dl < 0)
|
|
{
|
|
for (;;)
|
|
{
|
|
t = b[0];
|
|
r[0] = (0-t-c)&BN_MASK2;
|
|
if (t != 0) c=1;
|
|
if (++dl >= 0) break;
|
|
|
|
t = b[1];
|
|
r[1] = (0-t-c)&BN_MASK2;
|
|
if (t != 0) c=1;
|
|
if (++dl >= 0) break;
|
|
|
|
t = b[2];
|
|
r[2] = (0-t-c)&BN_MASK2;
|
|
if (t != 0) c=1;
|
|
if (++dl >= 0) break;
|
|
|
|
t = b[3];
|
|
r[3] = (0-t-c)&BN_MASK2;
|
|
if (t != 0) c=1;
|
|
if (++dl >= 0) break;
|
|
|
|
b += 4;
|
|
r += 4;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
int save_dl = dl;
|
|
while(c)
|
|
{
|
|
t = a[0];
|
|
r[0] = (t-c)&BN_MASK2;
|
|
if (t != 0) c=0;
|
|
if (--dl <= 0) break;
|
|
|
|
t = a[1];
|
|
r[1] = (t-c)&BN_MASK2;
|
|
if (t != 0) c=0;
|
|
if (--dl <= 0) break;
|
|
|
|
t = a[2];
|
|
r[2] = (t-c)&BN_MASK2;
|
|
if (t != 0) c=0;
|
|
if (--dl <= 0) break;
|
|
|
|
t = a[3];
|
|
r[3] = (t-c)&BN_MASK2;
|
|
if (t != 0) c=0;
|
|
if (--dl <= 0) break;
|
|
|
|
save_dl = dl;
|
|
a += 4;
|
|
r += 4;
|
|
}
|
|
if (dl > 0)
|
|
{
|
|
if (save_dl > dl)
|
|
{
|
|
switch (save_dl - dl)
|
|
{
|
|
case 1:
|
|
r[1] = a[1];
|
|
if (--dl <= 0) break;
|
|
case 2:
|
|
r[2] = a[2];
|
|
if (--dl <= 0) break;
|
|
case 3:
|
|
r[3] = a[3];
|
|
if (--dl <= 0) break;
|
|
}
|
|
a += 4;
|
|
r += 4;
|
|
}
|
|
}
|
|
if (dl > 0)
|
|
{
|
|
for(;;)
|
|
{
|
|
r[0] = a[0];
|
|
if (--dl <= 0) break;
|
|
r[1] = a[1];
|
|
if (--dl <= 0) break;
|
|
r[2] = a[2];
|
|
if (--dl <= 0) break;
|
|
r[3] = a[3];
|
|
if (--dl <= 0) break;
|
|
|
|
a += 4;
|
|
r += 4;
|
|
}
|
|
}
|
|
}
|
|
return c;
|
|
}
|
|
|
|
/* Karatsuba recursive multiplication algorithm
|
|
* (cf. Knuth, The Art of Computer Programming, Vol. 2) */
|
|
|
|
/* r is 2*n2 words in size,
|
|
* a and b are both n2 words in size.
|
|
* n2 must be a power of 2.
|
|
* We multiply and return the result.
|
|
* t must be 2*n2 words in size
|
|
* We calculate
|
|
* a[0]*b[0]
|
|
* a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
|
|
* a[1]*b[1]
|
|
*/
|
|
static void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
|
|
int dna, int dnb, BN_ULONG *t)
|
|
{
|
|
int n=n2/2,c1,c2;
|
|
int tna=n+dna, tnb=n+dnb;
|
|
unsigned int neg,zero;
|
|
BN_ULONG ln,lo,*p;
|
|
|
|
# ifdef BN_MUL_COMBA
|
|
/* Only call bn_mul_comba 8 if n2 == 8 and the
|
|
* two arrays are complete [steve]
|
|
*/
|
|
if (n2 == 8 && dna == 0 && dnb == 0)
|
|
{
|
|
bn_mul_comba8(r,a,b);
|
|
return;
|
|
}
|
|
# endif /* BN_MUL_COMBA */
|
|
/* Else do normal multiply */
|
|
if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL)
|
|
{
|
|
bn_mul_normal(r,a,n2+dna,b,n2+dnb);
|
|
if ((dna + dnb) < 0)
|
|
memset(&r[2*n2 + dna + dnb], 0,
|
|
sizeof(BN_ULONG) * -(dna + dnb));
|
|
return;
|
|
}
|
|
/* r=(a[0]-a[1])*(b[1]-b[0]) */
|
|
c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna);
|
|
c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n);
|
|
zero=neg=0;
|
|
switch (c1*3+c2)
|
|
{
|
|
case -4:
|
|
bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */
|
|
bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */
|
|
break;
|
|
case -3:
|
|
zero=1;
|
|
break;
|
|
case -2:
|
|
bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */
|
|
bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n); /* + */
|
|
neg=1;
|
|
break;
|
|
case -1:
|
|
case 0:
|
|
case 1:
|
|
zero=1;
|
|
break;
|
|
case 2:
|
|
bn_sub_part_words(t, a, &(a[n]),tna,n-tna); /* + */
|
|
bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */
|
|
neg=1;
|
|
break;
|
|
case 3:
|
|
zero=1;
|
|
break;
|
|
case 4:
|
|
bn_sub_part_words(t, a, &(a[n]),tna,n-tna);
|
|
bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n);
|
|
break;
|
|
}
|
|
|
|
# ifdef BN_MUL_COMBA
|
|
if (n == 4 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba4 could take
|
|
extra args to do this well */
|
|
{
|
|
if (!zero)
|
|
bn_mul_comba4(&(t[n2]),t,&(t[n]));
|
|
else
|
|
memset(&(t[n2]),0,8*sizeof(BN_ULONG));
|
|
|
|
bn_mul_comba4(r,a,b);
|
|
bn_mul_comba4(&(r[n2]),&(a[n]),&(b[n]));
|
|
}
|
|
else if (n == 8 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba8 could
|
|
take extra args to do this
|
|
well */
|
|
{
|
|
if (!zero)
|
|
bn_mul_comba8(&(t[n2]),t,&(t[n]));
|
|
else
|
|
memset(&(t[n2]),0,16*sizeof(BN_ULONG));
|
|
|
|
bn_mul_comba8(r,a,b);
|
|
bn_mul_comba8(&(r[n2]),&(a[n]),&(b[n]));
|
|
}
|
|
else
|
|
# endif /* BN_MUL_COMBA */
|
|
{
|
|
p= &(t[n2*2]);
|
|
if (!zero)
|
|
bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p);
|
|
else
|
|
memset(&(t[n2]),0,n2*sizeof(BN_ULONG));
|
|
bn_mul_recursive(r,a,b,n,0,0,p);
|
|
bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),n,dna,dnb,p);
|
|
}
|
|
|
|
/* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
|
|
* r[10] holds (a[0]*b[0])
|
|
* r[32] holds (b[1]*b[1])
|
|
*/
|
|
|
|
c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
|
|
|
|
if (neg) /* if t[32] is negative */
|
|
{
|
|
c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
|
|
}
|
|
else
|
|
{
|
|
/* Might have a carry */
|
|
c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),t,n2));
|
|
}
|
|
|
|
/* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
|
|
* r[10] holds (a[0]*b[0])
|
|
* r[32] holds (b[1]*b[1])
|
|
* c1 holds the carry bits
|
|
*/
|
|
c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
|
|
if (c1)
|
|
{
|
|
p= &(r[n+n2]);
|
|
lo= *p;
|
|
ln=(lo+c1)&BN_MASK2;
|
|
*p=ln;
|
|
|
|
/* The overflow will stop before we over write
|
|
* words we should not overwrite */
|
|
if (ln < (BN_ULONG)c1)
|
|
{
|
|
do {
|
|
p++;
|
|
lo= *p;
|
|
ln=(lo+1)&BN_MASK2;
|
|
*p=ln;
|
|
} while (ln == 0);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* n+tn is the word length
|
|
* t needs to be n*4 is size, as does r */
|
|
static void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n,
|
|
int tna, int tnb, BN_ULONG *t)
|
|
{
|
|
int i,j,n2=n*2;
|
|
int c1,c2,neg,zero;
|
|
BN_ULONG ln,lo,*p;
|
|
|
|
if (n < 8)
|
|
{
|
|
bn_mul_normal(r,a,n+tna,b,n+tnb);
|
|
return;
|
|
}
|
|
|
|
/* r=(a[0]-a[1])*(b[1]-b[0]) */
|
|
c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna);
|
|
c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n);
|
|
zero=neg=0;
|
|
switch (c1*3+c2)
|
|
{
|
|
case -4:
|
|
bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */
|
|
bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */
|
|
break;
|
|
case -3:
|
|
zero=1;
|
|
/* break; */
|
|
case -2:
|
|
bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */
|
|
bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n); /* + */
|
|
neg=1;
|
|
break;
|
|
case -1:
|
|
case 0:
|
|
case 1:
|
|
zero=1;
|
|
/* break; */
|
|
case 2:
|
|
bn_sub_part_words(t, a, &(a[n]),tna,n-tna); /* + */
|
|
bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */
|
|
neg=1;
|
|
break;
|
|
case 3:
|
|
zero=1;
|
|
/* break; */
|
|
case 4:
|
|
bn_sub_part_words(t, a, &(a[n]),tna,n-tna);
|
|
bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n);
|
|
break;
|
|
}
|
|
if (n == 8)
|
|
{
|
|
bn_mul_comba8(&(t[n2]),t,&(t[n]));
|
|
bn_mul_comba8(r,a,b);
|
|
bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb);
|
|
memset(&(r[n2+tna+tnb]),0,sizeof(BN_ULONG)*(n2-tna-tnb));
|
|
}
|
|
else
|
|
{
|
|
p= &(t[n2*2]);
|
|
bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p);
|
|
bn_mul_recursive(r,a,b,n,0,0,p);
|
|
i=n/2;
|
|
/* If there is only a bottom half to the number,
|
|
* just do it */
|
|
if (tna > tnb)
|
|
j = tna - i;
|
|
else
|
|
j = tnb - i;
|
|
if (j == 0)
|
|
{
|
|
bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),
|
|
i,tna-i,tnb-i,p);
|
|
memset(&(r[n2+i*2]),0,sizeof(BN_ULONG)*(n2-i*2));
|
|
}
|
|
else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */
|
|
{
|
|
bn_mul_part_recursive(&(r[n2]),&(a[n]),&(b[n]),
|
|
i,tna-i,tnb-i,p);
|
|
memset(&(r[n2+tna+tnb]),0,
|
|
sizeof(BN_ULONG)*(n2-tna-tnb));
|
|
}
|
|
else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */
|
|
{
|
|
memset(&(r[n2]),0,sizeof(BN_ULONG)*n2);
|
|
if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL
|
|
&& tnb < BN_MUL_RECURSIVE_SIZE_NORMAL)
|
|
{
|
|
bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb);
|
|
}
|
|
else
|
|
{
|
|
for (;;)
|
|
{
|
|
i/=2;
|
|
if (i < tna && i < tnb)
|
|
{
|
|
bn_mul_part_recursive(&(r[n2]),
|
|
&(a[n]),&(b[n]),
|
|
i,tna-i,tnb-i,p);
|
|
break;
|
|
}
|
|
else if (i <= tna && i <= tnb)
|
|
{
|
|
bn_mul_recursive(&(r[n2]),
|
|
&(a[n]),&(b[n]),
|
|
i,tna-i,tnb-i,p);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
|
|
* r[10] holds (a[0]*b[0])
|
|
* r[32] holds (b[1]*b[1])
|
|
*/
|
|
|
|
c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
|
|
|
|
if (neg) /* if t[32] is negative */
|
|
{
|
|
c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
|
|
}
|
|
else
|
|
{
|
|
/* Might have a carry */
|
|
c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),t,n2));
|
|
}
|
|
|
|
/* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
|
|
* r[10] holds (a[0]*b[0])
|
|
* r[32] holds (b[1]*b[1])
|
|
* c1 holds the carry bits
|
|
*/
|
|
c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
|
|
if (c1)
|
|
{
|
|
p= &(r[n+n2]);
|
|
lo= *p;
|
|
ln=(lo+c1)&BN_MASK2;
|
|
*p=ln;
|
|
|
|
/* The overflow will stop before we over write
|
|
* words we should not overwrite */
|
|
if (ln < (BN_ULONG)c1)
|
|
{
|
|
do {
|
|
p++;
|
|
lo= *p;
|
|
ln=(lo+1)&BN_MASK2;
|
|
*p=ln;
|
|
} while (ln == 0);
|
|
}
|
|
}
|
|
}
|
|
#endif /* BN_RECURSION */
|
|
|
|
static int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_POOL *ctx)
|
|
{
|
|
int release=0;
|
|
int ret=0;
|
|
int top,al,bl;
|
|
BIGNUM *rr;
|
|
#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
|
|
int i;
|
|
#endif
|
|
#ifdef BN_RECURSION
|
|
BIGNUM *t=NULL;
|
|
int j=0,k;
|
|
#endif
|
|
|
|
al=a->top;
|
|
bl=b->top;
|
|
|
|
if ((al == 0) || (bl == 0))
|
|
{
|
|
BN_zero(r);
|
|
return(1);
|
|
}
|
|
top=al+bl;
|
|
|
|
if ((r == a) || (r == b))
|
|
{
|
|
if ((rr = BN_POOL_get(ctx)) == NULL) goto err;
|
|
release++;
|
|
}
|
|
else
|
|
rr = r;
|
|
|
|
#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
|
|
i = al-bl;
|
|
#endif
|
|
#ifdef BN_MUL_COMBA
|
|
if (i == 0)
|
|
{
|
|
if (al == 8)
|
|
{
|
|
if (bn_wexpand(rr,16) == NULL) goto err;
|
|
rr->top=16;
|
|
bn_mul_comba8(rr->d,a->d,b->d);
|
|
goto end;
|
|
}
|
|
}
|
|
#endif /* BN_MUL_COMBA */
|
|
#ifdef BN_RECURSION
|
|
if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL))
|
|
{
|
|
if (i >= -1 && i <= 1)
|
|
{
|
|
int sav_j =0;
|
|
/* Find out the power of two lower or equal
|
|
to the longest of the two numbers */
|
|
if (i >= 0)
|
|
{
|
|
j = BN_num_bits_word((BN_ULONG)al);
|
|
}
|
|
if (i == -1)
|
|
{
|
|
j = BN_num_bits_word((BN_ULONG)bl);
|
|
}
|
|
sav_j = j;
|
|
j = 1<<(j-1);
|
|
k = j+j;
|
|
t = BN_POOL_get(ctx);
|
|
release++;
|
|
if (al > j || bl > j)
|
|
{
|
|
bn_wexpand(t,k*4);
|
|
bn_wexpand(rr,k*4);
|
|
bn_mul_part_recursive(rr->d,a->d,b->d,
|
|
j,al-j,bl-j,t->d);
|
|
}
|
|
else /* al <= j || bl <= j */
|
|
{
|
|
bn_wexpand(t,k*2);
|
|
bn_wexpand(rr,k*2);
|
|
bn_mul_recursive(rr->d,a->d,b->d,
|
|
j,al-j,bl-j,t->d);
|
|
}
|
|
rr->top=top;
|
|
goto end;
|
|
}
|
|
}
|
|
#endif /* BN_RECURSION */
|
|
if (bn_wexpand(rr,top) == NULL) goto err;
|
|
rr->top=top;
|
|
bn_mul_normal(rr->d,a->d,al,b->d,bl);
|
|
|
|
#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
|
|
end:
|
|
#endif
|
|
bn_correct_top(rr);
|
|
if (r != rr) BN_copy(r,rr);
|
|
ret=1;
|
|
err:
|
|
BN_POOL_release(ctx,release);
|
|
return(ret);
|
|
}
|
|
|
|
static void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
|
|
{
|
|
BN_ULONG *rr;
|
|
|
|
if (na < nb)
|
|
{
|
|
int itmp;
|
|
BN_ULONG *ltmp;
|
|
|
|
itmp=na; na=nb; nb=itmp;
|
|
ltmp=a; a=b; b=ltmp;
|
|
|
|
}
|
|
rr= &(r[na]);
|
|
if (nb <= 0)
|
|
{
|
|
(void)bn_mul_words(r,a,na,0);
|
|
return;
|
|
}
|
|
else
|
|
rr[0]=bn_mul_words(r,a,na,b[0]);
|
|
|
|
for (;;)
|
|
{
|
|
if (--nb <= 0) return;
|
|
rr[1]=bn_mul_add_words(&(r[1]),a,na,b[1]);
|
|
if (--nb <= 0) return;
|
|
rr[2]=bn_mul_add_words(&(r[2]),a,na,b[2]);
|
|
if (--nb <= 0) return;
|
|
rr[3]=bn_mul_add_words(&(r[3]),a,na,b[3]);
|
|
if (--nb <= 0) return;
|
|
rr[4]=bn_mul_add_words(&(r[4]),a,na,b[4]);
|
|
rr+=4;
|
|
r+=4;
|
|
b+=4;
|
|
}
|
|
}
|
|
/*bn_mul END*/
|
|
|
|
/* tmp must have 2*n words */
|
|
static void bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp)
|
|
{
|
|
int i,j,max;
|
|
const BN_ULONG *ap;
|
|
BN_ULONG *rp;
|
|
|
|
max=n*2;
|
|
ap=a;
|
|
rp=r;
|
|
rp[0]=rp[max-1]=0;
|
|
rp++;
|
|
j=n;
|
|
|
|
if (--j > 0)
|
|
{
|
|
ap++;
|
|
rp[j]=bn_mul_words(rp,ap,j,ap[-1]);
|
|
rp+=2;
|
|
}
|
|
|
|
for (i=n-2; i>0; i--)
|
|
{
|
|
j--;
|
|
ap++;
|
|
rp[j]=bn_mul_add_words(rp,ap,j,ap[-1]);
|
|
rp+=2;
|
|
}
|
|
|
|
bn_add_words(r,r,r,max);
|
|
|
|
/* There will not be a carry */
|
|
|
|
bn_sqr_words(tmp,a,n);
|
|
|
|
bn_add_words(r,r,tmp,max);
|
|
}
|
|
|
|
#ifdef BN_RECURSION
|
|
/* r is 2*n words in size,
|
|
* a and b are both n words in size. (There's not actually a 'b' here ...)
|
|
* n must be a power of 2.
|
|
* We multiply and return the result.
|
|
* t must be 2*n words in size
|
|
* We calculate
|
|
* a[0]*b[0]
|
|
* a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
|
|
* a[1]*b[1]
|
|
*/
|
|
static void bn_sqr_recursive(BN_ULONG *r, const BN_ULONG *a, int n2, BN_ULONG *t)
|
|
{
|
|
int n=n2/2;
|
|
int zero,c1;
|
|
BN_ULONG ln,lo,*p;
|
|
|
|
if (n2 == 4)
|
|
{
|
|
#ifndef BN_SQR_COMBA
|
|
bn_sqr_normal(r,a,4,t);
|
|
#else
|
|
bn_sqr_comba4(r,a);
|
|
#endif
|
|
return;
|
|
}
|
|
else if (n2 == 8)
|
|
{
|
|
#ifndef BN_SQR_COMBA
|
|
bn_sqr_normal(r,a,8,t);
|
|
#else
|
|
bn_sqr_comba8(r,a);
|
|
#endif
|
|
return;
|
|
}
|
|
if (n2 < BN_SQR_RECURSIVE_SIZE_NORMAL)
|
|
{
|
|
bn_sqr_normal(r,a,n2,t);
|
|
return;
|
|
}
|
|
/* r=(a[0]-a[1])*(a[1]-a[0]) */
|
|
c1=bn_cmp_words(a,&(a[n]),n);
|
|
zero=0;
|
|
if (c1 > 0)
|
|
bn_sub_words(t,a,&(a[n]),n);
|
|
else if (c1 < 0)
|
|
bn_sub_words(t,&(a[n]),a,n);
|
|
else
|
|
zero=1;
|
|
|
|
/* The result will always be negative unless it is zero */
|
|
p= &(t[n2*2]);
|
|
|
|
if (!zero)
|
|
bn_sqr_recursive(&(t[n2]),t,n,p);
|
|
else
|
|
memset(&(t[n2]),0,n2*sizeof(BN_ULONG));
|
|
bn_sqr_recursive(r,a,n,p);
|
|
bn_sqr_recursive(&(r[n2]),&(a[n]),n,p);
|
|
|
|
/* t[32] holds (a[0]-a[1])*(a[1]-a[0]), it is negative or zero
|
|
* r[10] holds (a[0]*b[0])
|
|
* r[32] holds (b[1]*b[1])
|
|
*/
|
|
|
|
c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
|
|
|
|
/* t[32] is negative */
|
|
c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
|
|
|
|
/* t[32] holds (a[0]-a[1])*(a[1]-a[0])+(a[0]*a[0])+(a[1]*a[1])
|
|
* r[10] holds (a[0]*a[0])
|
|
* r[32] holds (a[1]*a[1])
|
|
* c1 holds the carry bits
|
|
*/
|
|
c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
|
|
if (c1)
|
|
{
|
|
p= &(r[n+n2]);
|
|
lo= *p;
|
|
ln=(lo+c1)&BN_MASK2;
|
|
*p=ln;
|
|
|
|
/* The overflow will stop before we over write
|
|
* words we should not overwrite */
|
|
if (ln < (BN_ULONG)c1)
|
|
{
|
|
do {
|
|
p++;
|
|
lo= *p;
|
|
ln=(lo+1)&BN_MASK2;
|
|
*p=ln;
|
|
} while (ln == 0);
|
|
}
|
|
}
|
|
}
|
|
#endif /*BN_RECURSION */
|
|
|
|
/* unsigned subtraction of b from a, a must be larger than b. */
|
|
static int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
|
|
{
|
|
int max,min,dif;
|
|
register BN_ULONG t1,t2,*ap,*bp,*rp;
|
|
int i,carry;
|
|
|
|
max = a->top;
|
|
min = b->top;
|
|
dif = max - min;
|
|
|
|
if (dif < 0) /* hmm... should not be happening */
|
|
{
|
|
return(0);
|
|
}
|
|
|
|
if (bn_wexpand(r,max) == NULL) return(0);
|
|
|
|
ap=a->d;
|
|
bp=b->d;
|
|
rp=r->d;
|
|
|
|
carry=0;
|
|
for (i = min; i != 0; i--)
|
|
{
|
|
t1= *(ap++);
|
|
t2= *(bp++);
|
|
if (carry)
|
|
{
|
|
carry=(t1 <= t2);
|
|
t1=(t1-t2-1)&BN_MASK2;
|
|
}
|
|
else
|
|
{
|
|
carry=(t1 < t2);
|
|
t1=(t1-t2)&BN_MASK2;
|
|
}
|
|
*(rp++)=t1&BN_MASK2;
|
|
}
|
|
if (carry) /* subtracted */
|
|
{
|
|
if (!dif)
|
|
/* error: a < b */
|
|
return 0;
|
|
while (dif)
|
|
{
|
|
dif--;
|
|
t1 = *(ap++);
|
|
t2 = (t1-1)&BN_MASK2;
|
|
*(rp++) = t2;
|
|
if (t1)
|
|
break;
|
|
}
|
|
}
|
|
if (rp != ap)
|
|
{
|
|
for (;;)
|
|
{
|
|
if (!dif--) break;
|
|
rp[0]=ap[0];
|
|
if (!dif--) break;
|
|
rp[1]=ap[1];
|
|
if (!dif--) break;
|
|
rp[2]=ap[2];
|
|
if (!dif--) break;
|
|
rp[3]=ap[3];
|
|
rp+=4;
|
|
ap+=4;
|
|
}
|
|
}
|
|
|
|
r->top=max;
|
|
bn_correct_top(r);
|
|
return(1);
|
|
}
|
|
|
|
static int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont, BN_POOL *ctx)
|
|
{
|
|
int retn=0;
|
|
|
|
BIGNUM *n,*r;
|
|
BN_ULONG *ap,*np,*rp,n0,v,*nrp;
|
|
int al,nl,max,i,x,ri;
|
|
|
|
if ((r = BN_POOL_get(ctx)) == NULL) goto err;
|
|
|
|
if (!BN_copy(r,a)) goto err;
|
|
n= &(mont->N);
|
|
|
|
ap=a->d;
|
|
/* mont->ri is the size of mont->N in bits (rounded up
|
|
to the word size) */
|
|
al=ri=mont->ri/BN_BITS2;
|
|
|
|
nl=n->top;
|
|
if ((al == 0) || (nl == 0)) { r->top=0; return(1); }
|
|
|
|
max=(nl+al+1); /* allow for overflow (no?) XXX */
|
|
if (bn_wexpand(r,max) == NULL) goto err;
|
|
if (bn_wexpand(ret,max) == NULL) goto err;
|
|
|
|
np=n->d;
|
|
rp=r->d;
|
|
nrp= &(r->d[nl]);
|
|
|
|
/* clear the top words of T */
|
|
for (i=r->top; i<max; i++) /* memset? XXX */
|
|
r->d[i]=0;
|
|
|
|
r->top=max;
|
|
n0=mont->n0;
|
|
|
|
for (i=0; i<nl; i++)
|
|
{
|
|
v=bn_mul_add_words(rp,np,nl,(rp[0]*n0)&BN_MASK2);
|
|
nrp++;
|
|
rp++;
|
|
if (((nrp[-1]+=v)&BN_MASK2) >= v)
|
|
continue;
|
|
else
|
|
{
|
|
if (((++nrp[0])&BN_MASK2) != 0) continue;
|
|
if (((++nrp[1])&BN_MASK2) != 0) continue;
|
|
for (x=2; (((++nrp[x])&BN_MASK2) == 0); x++) ;
|
|
}
|
|
}
|
|
bn_correct_top(r);
|
|
|
|
/* mont->ri will be a multiple of the word size */
|
|
x=ri;
|
|
rp=ret->d;
|
|
ap= &(r->d[x]);
|
|
if (r->top < x)
|
|
al=0;
|
|
else
|
|
al=r->top-x;
|
|
ret->top=al;
|
|
al-=4;
|
|
for (i=0; i<al; i+=4)
|
|
{
|
|
BN_ULONG t1,t2,t3,t4;
|
|
|
|
t1=ap[i+0];
|
|
t2=ap[i+1];
|
|
t3=ap[i+2];
|
|
t4=ap[i+3];
|
|
rp[i+0]=t1;
|
|
rp[i+1]=t2;
|
|
rp[i+2]=t3;
|
|
rp[i+3]=t4;
|
|
}
|
|
al+=4;
|
|
for (; i<al; i++)
|
|
rp[i]=ap[i];
|
|
|
|
if (BN_ucmp(ret, &(mont->N)) >= 0)
|
|
{
|
|
if (!BN_usub(ret,ret,&(mont->N))) goto err;
|
|
}
|
|
retn=1;
|
|
err:
|
|
BN_POOL_release(ctx,1);
|
|
return(retn);
|
|
}
|
|
|
|
/*SQR*/
|
|
/* r must not be a */
|
|
/* I've just gone over this and it is now %20 faster on x86 - eay - 27 Jun 96 */
|
|
static int BN_sqr(BIGNUM *r, const BIGNUM *a, BN_POOL *ctx)
|
|
{
|
|
int release=0;
|
|
int max,al;
|
|
int ret = 0;
|
|
BIGNUM *tmp,*rr;
|
|
|
|
al=a->top;
|
|
if (al <= 0)
|
|
{
|
|
r->top=0;
|
|
return 1;
|
|
}
|
|
|
|
if(a != r)
|
|
rr=r;
|
|
else
|
|
{rr=BN_POOL_get(ctx);release++;}
|
|
tmp=BN_POOL_get(ctx);
|
|
release++;
|
|
if (!rr || !tmp) goto err;
|
|
|
|
max = 2 * al; /* Non-zero (from above) */
|
|
if (bn_wexpand(rr,max) == NULL) goto err;
|
|
|
|
if (al == 4)
|
|
{
|
|
#ifndef BN_SQR_COMBA
|
|
BN_ULONG t[8];
|
|
bn_sqr_normal(rr->d,a->d,4,t);
|
|
#else
|
|
bn_sqr_comba4(rr->d,a->d);
|
|
#endif
|
|
}
|
|
else if (al == 8)
|
|
{
|
|
#ifndef BN_SQR_COMBA
|
|
BN_ULONG t[16];
|
|
bn_sqr_normal(rr->d,a->d,8,t);
|
|
#else
|
|
bn_sqr_comba8(rr->d,a->d);
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
#if defined(BN_RECURSION)
|
|
if (al < BN_SQR_RECURSIVE_SIZE_NORMAL)
|
|
{
|
|
BN_ULONG t[BN_SQR_RECURSIVE_SIZE_NORMAL*2];
|
|
bn_sqr_normal(rr->d,a->d,al,t);
|
|
}
|
|
else
|
|
{
|
|
int j,k;
|
|
|
|
j=BN_num_bits_word((BN_ULONG)al);
|
|
j=1<<(j-1);
|
|
k=j+j;
|
|
if (al == j)
|
|
{
|
|
if (bn_wexpand(tmp,k*2) == NULL) goto err;
|
|
bn_sqr_recursive(rr->d,a->d,al,tmp->d);
|
|
}
|
|
else
|
|
{
|
|
if (bn_wexpand(tmp,max) == NULL) goto err;
|
|
bn_sqr_normal(rr->d,a->d,al,tmp->d);
|
|
}
|
|
}
|
|
#else
|
|
if (bn_wexpand(tmp,max) == NULL) goto err;
|
|
bn_sqr_normal(rr->d,a->d,al,tmp->d);
|
|
#endif
|
|
}
|
|
|
|
/* If the most-significant half of the top word of 'a' is zero, then
|
|
* the square of 'a' will max-1 words. */
|
|
if(a->d[al - 1] == (a->d[al - 1] & BN_MASK2l))
|
|
rr->top = max - 1;
|
|
else
|
|
rr->top = max;
|
|
if (rr != r) BN_copy(r,rr);
|
|
ret = 1;
|
|
err:
|
|
BN_POOL_release(ctx,release);
|
|
return(ret);
|
|
}
|
|
|
|
static int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_MONT_CTX *mont, BN_POOL *ctx)
|
|
{
|
|
BIGNUM *tmp;
|
|
int ret=0;
|
|
|
|
tmp = BN_POOL_get(ctx);
|
|
if (tmp == NULL) goto err;
|
|
|
|
if (0 == BN_ucmp(a,b))
|
|
{
|
|
if (!BN_sqr(tmp,a,ctx)) goto err;
|
|
}
|
|
else
|
|
{
|
|
if (!BN_mul(tmp,a,b,ctx)) goto err;
|
|
}
|
|
/* reduce from aRR to aR */
|
|
if (!BN_from_montgomery(r,tmp,mont,ctx)) goto err;
|
|
ret=1;
|
|
err:
|
|
BN_POOL_release(ctx,1);
|
|
return(ret);
|
|
}
|
|
|
|
/* unsigned add of b to a */
|
|
static int BN_uadd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
|
|
{
|
|
int max,min,dif;
|
|
BN_ULONG *ap,*bp,*rp,carry,t1,t2;
|
|
const BIGNUM *tmp;
|
|
|
|
if (a->top < b->top)
|
|
{ tmp=a; a=b; b=tmp; }
|
|
max = a->top;
|
|
min = b->top;
|
|
dif = max - min;
|
|
|
|
if (bn_wexpand(r,max+1) == NULL)
|
|
return 0;
|
|
|
|
r->top=max;
|
|
|
|
ap=a->d;
|
|
bp=b->d;
|
|
rp=r->d;
|
|
|
|
carry=bn_add_words(rp,ap,bp,min);
|
|
rp+=min;
|
|
ap+=min;
|
|
bp+=min;
|
|
|
|
if (carry)
|
|
{
|
|
while (dif)
|
|
{
|
|
dif--;
|
|
t1 = *(ap++);
|
|
t2 = (t1+1) & BN_MASK2;
|
|
*(rp++) = t2;
|
|
if (t2)
|
|
{
|
|
carry=0;
|
|
break;
|
|
}
|
|
}
|
|
if (carry)
|
|
{
|
|
/* carry != 0 => dif == 0 */
|
|
*rp = 1;
|
|
r->top++;
|
|
}
|
|
}
|
|
if (dif && rp != ap)
|
|
while (dif--)
|
|
/* copy remaining words if ap != rp */
|
|
*(rp++) = *(ap++);
|
|
return 1;
|
|
}
|
|
|
|
static int BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
|
|
{
|
|
int max;
|
|
|
|
/* We are actually doing a - b :-) */
|
|
|
|
max=(a->top > b->top)?a->top:b->top;
|
|
if (bn_wexpand(r,max) == NULL) return(0);
|
|
if (BN_ucmp(a,b) < 0)
|
|
{
|
|
if (!BN_usub(r,b,a)) return(0);
|
|
DHPRINT("----\n");
|
|
}
|
|
else
|
|
{
|
|
if (!BN_usub(r,a,b)) return(0);
|
|
}
|
|
return(1);
|
|
}
|
|
|
|
static int BN_mul_word(BIGNUM *a, BN_ULONG w)
|
|
{
|
|
BN_ULONG ll;
|
|
|
|
w&=BN_MASK2;
|
|
if (a->top)
|
|
{
|
|
if (w == 0)
|
|
BN_zero(a);
|
|
else
|
|
{
|
|
ll=bn_mul_words(a->d,a->d,a->top,w);
|
|
if (ll)
|
|
{
|
|
if (bn_wexpand(a,a->top+1) == NULL) return(0);
|
|
a->d[a->top++]=ll;
|
|
}
|
|
}
|
|
}
|
|
return(1);
|
|
}
|
|
|
|
/* solves ax == 1 (mod n) */
|
|
static BIGNUM *BN_mod_inverse(BIGNUM *in,
|
|
const BIGNUM *a, const BIGNUM *n, BN_POOL *ctx)
|
|
{
|
|
BIGNUM *A,*B,*X,*Y,*M,*D,*T,*R=NULL;
|
|
BIGNUM *ret=NULL;
|
|
int sign;
|
|
|
|
A = BN_POOL_get(ctx);
|
|
B = BN_POOL_get(ctx);
|
|
X = BN_POOL_get(ctx);
|
|
D = BN_POOL_get(ctx);
|
|
M = BN_POOL_get(ctx);
|
|
Y = BN_POOL_get(ctx);
|
|
T = BN_POOL_get(ctx);
|
|
if (T == NULL) goto err;
|
|
|
|
if (in == NULL)
|
|
R=BN_new();
|
|
else
|
|
R=in;
|
|
if (R == NULL) goto err;
|
|
|
|
BN_one(X);
|
|
BN_zero(Y);
|
|
if (BN_copy(B,a) == NULL) goto err;
|
|
if (BN_copy(A,n) == NULL) goto err;
|
|
if (BN_ucmp(B, A) >= 0)
|
|
{
|
|
if (!(BN_mod(B, B, A, ctx))) goto err;
|
|
}
|
|
sign = -1;
|
|
/* From B = a mod |n|, A = |n| it follows that
|
|
*
|
|
* 0 <= B < A,
|
|
* -sign*X*a == B (mod |n|),
|
|
* sign*Y*a == A (mod |n|).
|
|
*/
|
|
|
|
if (BN_is_odd(n) && (BN_num_bits(n) <= (BN_BITS <= 32 ? 450 : 2048)))
|
|
{
|
|
/* Binary inversion algorithm; requires odd modulus.
|
|
* This is faster than the general algorithm if the modulus
|
|
* is sufficiently small (about 400 .. 500 bits on 32-bit
|
|
* sytems, but much more on 64-bit systems) */
|
|
int shift;
|
|
|
|
while (!BN_is_zero(B))
|
|
{
|
|
/*
|
|
* 0 < B < |n|,
|
|
* 0 < A <= |n|,
|
|
* (1) -sign*X*a == B (mod |n|),
|
|
* (2) sign*Y*a == A (mod |n|)
|
|
*/
|
|
|
|
/* Now divide B by the maximum possible power of two in the integers,
|
|
* and divide X by the same value mod |n|.
|
|
* When we're done, (1) still holds. */
|
|
shift = 0;
|
|
while (!BN_is_bit_set(B, shift)) /* note that 0 < B */
|
|
{
|
|
shift++;
|
|
|
|
if (BN_is_odd(X))
|
|
{
|
|
if (!BN_uadd(X, X, n)) goto err;
|
|
}
|
|
/* now X is even, so we can easily divide it by two */
|
|
if (!BN_rshift1(X, X)) goto err;
|
|
}
|
|
if (shift > 0)
|
|
{
|
|
if (!BN_rshift(B, B, shift)) goto err;
|
|
}
|
|
|
|
|
|
/* Same for A and Y. Afterwards, (2) still holds. */
|
|
shift = 0;
|
|
while (!BN_is_bit_set(A, shift)) /* note that 0 < A */
|
|
{
|
|
shift++;
|
|
|
|
if (BN_is_odd(Y))
|
|
{
|
|
if (!BN_uadd(Y, Y, n)) goto err;
|
|
}
|
|
/* now Y is even */
|
|
if (!BN_rshift1(Y, Y)) goto err;
|
|
}
|
|
if (shift > 0)
|
|
{
|
|
if (!BN_rshift(A, A, shift)) goto err;
|
|
}
|
|
|
|
|
|
/* We still have (1) and (2).
|
|
* Both A and B are odd.
|
|
* The following computations ensure that
|
|
*
|
|
* 0 <= B < |n|,
|
|
* 0 < A < |n|,
|
|
* (1) -sign*X*a == B (mod |n|),
|
|
* (2) sign*Y*a == A (mod |n|),
|
|
*
|
|
* and that either A or B is even in the next iteration.
|
|
*/
|
|
if (BN_ucmp(B, A) >= 0)
|
|
{
|
|
/* -sign*(X + Y)*a == B - A (mod |n|) */
|
|
if (!BN_uadd(X, X, Y)) goto err;
|
|
/* NB: we could use BN_mod_add_quick(X, X, Y, n), but that
|
|
* actually makes the algorithm slower */
|
|
if (!BN_usub(B, B, A)) goto err;
|
|
}
|
|
else
|
|
{
|
|
/* sign*(X + Y)*a == A - B (mod |n|) */
|
|
if (!BN_uadd(Y, Y, X)) goto err;
|
|
/* as above, BN_mod_add_quick(Y, Y, X, n) would slow things down */
|
|
if (!BN_usub(A, A, B)) goto err;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
goto err;
|
|
|
|
/*
|
|
* The while loop (Euclid's algorithm) ends when
|
|
* A == gcd(a,n);
|
|
* we have
|
|
* sign*Y*a == A (mod |n|),
|
|
* where Y is non-negative.
|
|
*/
|
|
|
|
if (sign < 0)
|
|
{
|
|
if (!BN_sub(Y,n,Y)) goto err;
|
|
}
|
|
/* Now Y*a == A (mod |n|). */
|
|
|
|
if (BN_is_one(A))
|
|
{
|
|
/* Y*a == 1 (mod |n|) */
|
|
if (BN_ucmp(Y,n) < 0)
|
|
{
|
|
if (!BN_copy(R,Y)) goto err;
|
|
}
|
|
else
|
|
{
|
|
if (!(BN_mod(R, Y, n, ctx))) goto err;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
goto err;
|
|
}
|
|
ret=R;
|
|
err:
|
|
if ((ret == NULL) && (in == NULL)) BN_free(R);
|
|
BN_POOL_release(ctx,7);
|
|
return(ret);
|
|
}
|
|
|
|
static int BN_sub_word(BIGNUM *a, BN_ULONG w)
|
|
{
|
|
int i;
|
|
|
|
w &= BN_MASK2;
|
|
|
|
/* degenerate case: w is zero */
|
|
if (!w) return 1;
|
|
/* degenerate case: a is zero */
|
|
if(BN_is_zero(a))
|
|
{
|
|
i = BN_set_word(a,w);
|
|
if (i != 0)
|
|
{
|
|
DHPRINT("BN_set_negative\n");
|
|
/*BN_set_negative(a, 1); */
|
|
}
|
|
return i;
|
|
}
|
|
|
|
if ((a->top == 1) && (a->d[0] < w))
|
|
{
|
|
a->d[0]=w-a->d[0];
|
|
DHPRINT("--------\n");
|
|
return(1);
|
|
}
|
|
i=0;
|
|
for (;;)
|
|
{
|
|
if (a->d[i] >= w)
|
|
{
|
|
a->d[i]-=w;
|
|
break;
|
|
}
|
|
else
|
|
{
|
|
a->d[i]=(a->d[i]-w)&BN_MASK2;
|
|
i++;
|
|
w=1;
|
|
}
|
|
}
|
|
if ((a->d[i] == 0) && (i == (a->top-1)))
|
|
a->top--;
|
|
return(1);
|
|
}
|
|
|
|
static int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_POOL *ctx)
|
|
{
|
|
int ret = 0;
|
|
BIGNUM *Ri,*R;
|
|
BIGNUM tmod;
|
|
BN_ULONG buf[2];
|
|
|
|
if((Ri = BN_POOL_get(ctx)) == NULL) goto err;
|
|
R= &(mont->RR); /* grab RR as a temp */
|
|
if (!BN_copy(&(mont->N),mod)) goto err; /* Set N */
|
|
|
|
/*start*/
|
|
mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2;
|
|
BN_zero(R);
|
|
if (!(BN_set_bit(R,BN_BITS2))) goto err; /* R */
|
|
|
|
buf[0]=mod->d[0]; /* tmod = N mod word size */
|
|
buf[1]=0;
|
|
tmod.d=buf;
|
|
tmod.top = buf[0] != 0 ? 1 : 0;
|
|
tmod.dmax=2;
|
|
/* Ri = R^-1 mod N*/
|
|
if ((BN_mod_inverse(Ri,R,&tmod,ctx)) == NULL)
|
|
goto err;
|
|
if (!BN_lshift(Ri,Ri,BN_BITS2)) goto err; /* R*Ri */
|
|
if (!BN_is_zero(Ri))
|
|
{
|
|
if (!BN_sub_word(Ri,1)) goto err;
|
|
}
|
|
else /* if N mod word size == 1 */
|
|
{
|
|
if (!BN_set_word(Ri,BN_MASK2)) goto err; /* Ri-- (mod word size) */
|
|
}
|
|
if (!BN_div(Ri,NULL,Ri,&tmod,ctx)) goto err;
|
|
/* Ni = (R*Ri-1)/N,
|
|
* keep only least significant word: */
|
|
mont->n0 = (Ri->top > 0) ? Ri->d[0] : 0;
|
|
/*end*/
|
|
|
|
/* setup RR for conversions */
|
|
BN_zero(&(mont->RR));
|
|
if (!BN_set_bit(&(mont->RR),mont->ri*2)) goto err;
|
|
if (!BN_mod(&(mont->RR),&(mont->RR),&(mont->N),ctx)) goto err;
|
|
|
|
ret = 1;
|
|
err:
|
|
return ret;
|
|
}
|
|
|
|
static const EVP_MD sha1_md=
|
|
{
|
|
NID_sha1,
|
|
SHA_DIGEST_LENGTH,
|
|
0,
|
|
RAND_init,
|
|
RAND_update,
|
|
RAND_final,
|
|
sizeof(EVP_MD *)+sizeof(OPEN_SHA_CTX),
|
|
};
|
|
|
|
/* This call frees resources associated with the context */
|
|
static int EVP_MD_CTX_cleanup(EVP_MD_CTX *ctx)
|
|
{
|
|
/* Don't assume ctx->md_data was cleaned in EVP_Digest_Final,
|
|
* because sometimes only copies of the context are ever finalised.
|
|
*/
|
|
if (ctx->digest && ctx->digest->ctx_size && ctx->md_data)
|
|
{
|
|
os_free_mem(NULL, ctx->md_data);
|
|
}
|
|
memset(ctx,'\0',sizeof *ctx);
|
|
|
|
return 1;
|
|
}
|
|
|
|
/* The caller can assume that this removes any secret data from the context */
|
|
static int EVP_DigestFinal_ex(EVP_MD_CTX *ctx, unsigned char *md, unsigned int *size)
|
|
{
|
|
int ret;
|
|
|
|
ret=ctx->digest->final(ctx,md);
|
|
if (size != NULL)
|
|
*size=ctx->digest->md_size;
|
|
memset(ctx->md_data,0,ctx->digest->ctx_size);
|
|
return ret;
|
|
}
|
|
|
|
static void EVP_MD_CTX_init(EVP_MD_CTX *ctx)
|
|
{
|
|
memset(ctx,'\0',sizeof *ctx);
|
|
}
|
|
|
|
static int EVP_DigestInit_ex(EVP_MD_CTX *ctx, const EVP_MD *type)
|
|
{
|
|
if (ctx->digest != type)
|
|
{
|
|
if (ctx->digest && ctx->digest->ctx_size)
|
|
os_free_mem(NULL, ctx->md_data);
|
|
ctx->digest=type;
|
|
if (type->ctx_size)
|
|
os_alloc_mem(NULL, (UCHAR **)&(ctx->md_data), type->ctx_size);
|
|
}
|
|
return ctx->digest->init(ctx);
|
|
}
|
|
|
|
static int EVP_DigestUpdate(EVP_MD_CTX *ctx, const void *data,
|
|
size_t count)
|
|
{
|
|
return ctx->digest->update(ctx,data,count);
|
|
}
|
|
|
|
static void ssleay_rand_add(const void *buf, int num, long add)
|
|
{
|
|
int i,j,k,st_idx;
|
|
long md_c[2];
|
|
unsigned char local_md[SHA_DIGEST_LENGTH];
|
|
EVP_MD_CTX m;
|
|
|
|
/*
|
|
* (Based on the rand(3) manpage)
|
|
*
|
|
* The input is chopped up into units of 20 bytes (or less for
|
|
* the last block). Each of these blocks is run through the hash
|
|
* function as follows: The data passed to the hash function
|
|
* is the current 'md', the same number of bytes from the 'state'
|
|
* (the location determined by in incremented looping index) as
|
|
* the current 'block', the new key data 'block', and 'count'
|
|
* (which is incremented after each use).
|
|
* The result of this is kept in 'md' and also xored into the
|
|
* 'state' at the same locations that were used as input into the
|
|
* hash function.
|
|
*/
|
|
|
|
st_idx=g_state_index;
|
|
|
|
/* use our own copies of the counters so that even
|
|
* if a concurrent thread seeds with exactly the
|
|
* same data and uses the same subarray there's _some_
|
|
* difference */
|
|
md_c[0] = g_md_count[0];
|
|
md_c[1] = g_md_count[1];
|
|
|
|
memcpy(local_md, g_md, sizeof g_md);
|
|
|
|
/* g_state_index <= g_state_num <= STATE_SIZE */
|
|
g_state_index += num;
|
|
if (g_state_index >= STATE_SIZE)
|
|
{
|
|
g_state_index%=STATE_SIZE;
|
|
g_state_num=STATE_SIZE;
|
|
}
|
|
else if (g_state_num < STATE_SIZE)
|
|
{
|
|
if (g_state_index > g_state_num)
|
|
g_state_num=g_state_index;
|
|
}
|
|
/* g_state_index <= g_state_num <= STATE_SIZE */
|
|
|
|
/* g_state[st_idx], ..., g_state[(st_idx + num - 1) % STATE_SIZE]
|
|
* are what we will use now, but other threads may use them
|
|
* as well */
|
|
|
|
g_md_count[1] += (num / SHA_DIGEST_LENGTH) + (num % SHA_DIGEST_LENGTH > 0);
|
|
|
|
EVP_MD_CTX_init(&m);
|
|
for (i=0; i<num; i+=SHA_DIGEST_LENGTH)
|
|
{
|
|
j=(num-i);
|
|
j=(j > SHA_DIGEST_LENGTH)?SHA_DIGEST_LENGTH:j;
|
|
|
|
EVP_DigestInit_ex(&m,&sha1_md);
|
|
EVP_DigestUpdate(&m,local_md,SHA_DIGEST_LENGTH);
|
|
k=(st_idx+j)-STATE_SIZE;
|
|
if (k > 0)
|
|
{
|
|
EVP_DigestUpdate(&m,&(g_state[st_idx]),j-k);
|
|
EVP_DigestUpdate(&m,&(g_state[0]),k);
|
|
}
|
|
else
|
|
EVP_DigestUpdate(&m,&(g_state[st_idx]),j);
|
|
|
|
EVP_DigestUpdate(&m,buf,j);
|
|
EVP_DigestUpdate(&m,(unsigned char *)&(md_c[0]),sizeof(md_c));
|
|
EVP_DigestFinal_ex(&m,local_md,NULL);
|
|
md_c[1]++;
|
|
|
|
buf=(const char *)buf + j;
|
|
|
|
for (k=0; k<j; k++)
|
|
{
|
|
/* Parallel threads may interfere with this,
|
|
* but always each byte of the new state is
|
|
* the XOR of some previous value of its
|
|
* and local_md (itermediate values may be lost).
|
|
* Alway using locking could hurt performance more
|
|
* than necessary given that conflicts occur only
|
|
* when the total seeding is longer than the random
|
|
* state. */
|
|
g_state[st_idx++]^=local_md[k];
|
|
if (st_idx >= STATE_SIZE)
|
|
st_idx=0;
|
|
}
|
|
}
|
|
EVP_MD_CTX_cleanup(&m);
|
|
|
|
/* Don't just copy back local_md into md -- this could mean that
|
|
* other thread's seeding remains without effect (except for
|
|
* the incremented counter). By XORing it we keep at least as
|
|
* much entropy as fits into md. */
|
|
for (k = 0; k < (int)sizeof(g_md); k++)
|
|
{
|
|
g_md[k] ^= local_md[k];
|
|
}
|
|
if (g_entropy < ENTROPY_NEEDED) /* stop counting when we have enough */
|
|
g_entropy += add;
|
|
}
|
|
|
|
static int ssleay_rand_bytes(unsigned char *buf, int num)
|
|
{
|
|
static volatile int stirred_pool = 0;
|
|
int i,j,k,st_num,st_idx;
|
|
int num_ceil;
|
|
int ok;
|
|
long md_c[2];
|
|
unsigned char local_md[SHA_DIGEST_LENGTH];
|
|
EVP_MD_CTX m;
|
|
#ifndef GETPID_IS_MEANINGLESS
|
|
pid_t curr_pid = getpid();
|
|
#endif
|
|
int do_stir_pool = 0;
|
|
|
|
if (num <= 0)
|
|
return 1;
|
|
|
|
EVP_MD_CTX_init(&m);
|
|
/* round upwards to multiple of SHA_DIGEST_LENGTH/2 */
|
|
num_ceil = (1 + (num-1)/(SHA_DIGEST_LENGTH/2)) * (SHA_DIGEST_LENGTH/2);
|
|
|
|
/*
|
|
* (Based on the rand(3) manpage:)
|
|
*
|
|
* For each group of 10 bytes (or less), we do the following:
|
|
*
|
|
* Input into the hash function the local 'md' (which is RAND_initialized from
|
|
* the global 'md' before any bytes are generated), the bytes that are to
|
|
* be overwritten by the random bytes, and bytes from the 'state'
|
|
* (incrementing looping index). From this digest output (which is kept
|
|
* in 'md'), the top (up to) 10 bytes are returned to the caller and the
|
|
* bottom 10 bytes are xored into the 'state'.
|
|
*
|
|
* Finally, after we have finished 'num' random bytes for the
|
|
* caller, 'count' (which is incremented) and the local and global 'md'
|
|
* are fed into the hash function and the results are kept in the
|
|
* global 'md'.
|
|
*/
|
|
|
|
if (!RAND_initialized)
|
|
{
|
|
RAND_poll();
|
|
RAND_initialized = 1;
|
|
}
|
|
|
|
if (!stirred_pool)
|
|
do_stir_pool = 1;
|
|
|
|
ok = (g_entropy >= ENTROPY_NEEDED);
|
|
if (!ok)
|
|
{
|
|
/* If the PRNG state is not yet unpredictable, then seeing
|
|
* the PRNG output may help attackers to determine the new
|
|
* state; thus we have to decrease the entropy estimate.
|
|
* Once we've had enough initial seeding we don't bother to
|
|
* adjust the entropy count, though, because we're not ambitious
|
|
* to provide *information-theoretic* randomness.
|
|
*
|
|
* NOTE: This approach fails if the program forks before
|
|
* we have enough entropy. Entropy should be collected
|
|
* in a separate input pool and be transferred to the
|
|
* output pool only when the entropy limit has been reached.
|
|
*/
|
|
g_entropy -= num;
|
|
if (g_entropy < 0)
|
|
g_entropy = 0;
|
|
}
|
|
|
|
if (do_stir_pool)
|
|
{
|
|
/* In the output function only half of 'md' remains secret,
|
|
* so we better make sure that the required entropy gets
|
|
* 'evenly distributed' through 'state', our randomness pool.
|
|
* The input function (ssleay_rand_add) chains all of 'md',
|
|
* which makes it more suitable for this purpose.
|
|
*/
|
|
|
|
int n = STATE_SIZE; /* so that the complete pool gets accessed */
|
|
while (n > 0)
|
|
{
|
|
/* Note that the seed does not matter, it's just that
|
|
* ssleay_rand_add expects to have something to hash. */
|
|
/* at least SHA_DIGEST_LENGTH "...................." */
|
|
ssleay_rand_add("....................", SHA_DIGEST_LENGTH, 0);
|
|
n -= SHA_DIGEST_LENGTH;
|
|
}
|
|
if (ok)
|
|
stirred_pool = 1;
|
|
}
|
|
|
|
st_idx=g_state_index;
|
|
st_num=g_state_num;
|
|
md_c[0] = g_md_count[0];
|
|
md_c[1] = g_md_count[1];
|
|
memcpy(local_md, g_md, sizeof g_md);
|
|
|
|
g_state_index+=num_ceil;
|
|
if (g_state_index > g_state_num)
|
|
g_state_index %= g_state_num;
|
|
|
|
/* g_state[st_idx], ..., g_state[(st_idx + num_ceil - 1) % st_num]
|
|
* are now ours (but other threads may use them too) */
|
|
|
|
g_md_count[0] += 1;
|
|
|
|
while (num > 0)
|
|
{
|
|
/* num_ceil -= SHA_DIGEST_LENGTH/2 */
|
|
j=(num >= SHA_DIGEST_LENGTH/2)?SHA_DIGEST_LENGTH/2:num;
|
|
num-=j;
|
|
EVP_DigestInit_ex(&m,&sha1_md);
|
|
#ifndef GETPID_IS_MEANINGLESS
|
|
if (curr_pid) /* just in the first iteration to save time */
|
|
{
|
|
EVP_DigestUpdate(&m,(unsigned char*)&curr_pid,sizeof curr_pid);
|
|
curr_pid = 0;
|
|
}
|
|
#endif
|
|
EVP_DigestUpdate(&m,local_md,SHA_DIGEST_LENGTH);
|
|
EVP_DigestUpdate(&m,(unsigned char *)&(md_c[0]),sizeof(md_c));
|
|
#ifndef PURIFY
|
|
EVP_DigestUpdate(&m,buf,j); /* purify complains */
|
|
#endif
|
|
k=(st_idx+SHA_DIGEST_LENGTH/2)-st_num;
|
|
if (k > 0)
|
|
{
|
|
EVP_DigestUpdate(&m,&(g_state[st_idx]),SHA_DIGEST_LENGTH/2-k);
|
|
EVP_DigestUpdate(&m,&(g_state[0]),k);
|
|
}
|
|
else
|
|
EVP_DigestUpdate(&m,&(g_state[st_idx]),SHA_DIGEST_LENGTH/2);
|
|
EVP_DigestFinal_ex(&m,local_md,NULL);
|
|
|
|
for (i=0; i<SHA_DIGEST_LENGTH/2; i++)
|
|
{
|
|
g_state[st_idx++]^=local_md[i]; /* may compete with other threads */
|
|
if (st_idx >= st_num)
|
|
st_idx=0;
|
|
if (i < j)
|
|
*(buf++)=local_md[i+SHA_DIGEST_LENGTH/2];
|
|
}
|
|
}
|
|
EVP_DigestInit_ex(&m,&sha1_md);
|
|
EVP_DigestUpdate(&m,(unsigned char *)&(md_c[0]),sizeof(md_c));
|
|
EVP_DigestUpdate(&m,local_md,SHA_DIGEST_LENGTH);
|
|
EVP_DigestUpdate(&m,g_md,SHA_DIGEST_LENGTH);
|
|
EVP_DigestFinal_ex(&m,g_md,NULL);
|
|
EVP_MD_CTX_cleanup(&m);
|
|
if (ok)
|
|
return(1);
|
|
else
|
|
{
|
|
return(0);
|
|
}
|
|
}
|
|
|
|
static int RAND_poll(void)
|
|
{
|
|
unsigned long l=100;
|
|
unsigned char tmpbuf[ENTROPY_NEEDED];
|
|
int n = 0;
|
|
|
|
n=ENTROPY_NEEDED;
|
|
|
|
if (n > 0)
|
|
{
|
|
ssleay_rand_add(tmpbuf,sizeof tmpbuf,(long)n);
|
|
}
|
|
|
|
/* put in some default random data, we need more than just this */
|
|
l=100;/*curr_pid; */
|
|
ssleay_rand_add(&l,sizeof(l),0);
|
|
l=200;/*getuid(); */
|
|
ssleay_rand_add(&l,sizeof(l),0);
|
|
|
|
#ifdef USERSPACE
|
|
l=time(NULL);
|
|
#else
|
|
NdisGetSystemUpTime(&l);
|
|
#endif /*USERSPACE */
|
|
ssleay_rand_add(&l,sizeof(l),0);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom)
|
|
{
|
|
unsigned char *buf=NULL;
|
|
int ret=0,bit,bytes,mask;
|
|
time_t tim;
|
|
|
|
if (bits == 0)
|
|
{
|
|
BN_zero(rnd);
|
|
return 1;
|
|
}
|
|
|
|
bytes=(bits+7)/8;
|
|
bit=(bits-1)%8;
|
|
mask=0xff<<(bit+1);
|
|
|
|
os_alloc_mem(NULL, (UCHAR **)&buf, bytes);
|
|
if (buf == NULL)
|
|
{
|
|
goto err;
|
|
}
|
|
|
|
/* make a random number and set the top and bottom bits */
|
|
#ifdef USERSPACE
|
|
time(&tim);
|
|
#else
|
|
NdisGetSystemUpTime((ULONG *)(&tim));
|
|
#endif /*USERSPACE */
|
|
ssleay_rand_add(&tim,sizeof(tim),0);
|
|
|
|
if (pseudorand)
|
|
{
|
|
if (ssleay_rand_bytes(buf, bytes) == -1)
|
|
goto err;
|
|
}
|
|
else
|
|
{
|
|
if (ssleay_rand_bytes(buf, bytes) <= 0)
|
|
goto err;
|
|
}
|
|
|
|
if (pseudorand == 2)
|
|
{
|
|
/* generate patterns that are more likely to trigger BN
|
|
library bugs */
|
|
int i;
|
|
unsigned char c;
|
|
|
|
for (i = 0; i < bytes; i++)
|
|
{
|
|
ssleay_rand_bytes(&c, 1);
|
|
if (c >= 128 && i > 0)
|
|
buf[i] = buf[i-1];
|
|
else if (c < 42)
|
|
buf[i] = 0;
|
|
else if (c < 84)
|
|
buf[i] = 255;
|
|
}
|
|
}
|
|
|
|
if (top != -1)
|
|
{
|
|
if (top)
|
|
{
|
|
if (bit == 0)
|
|
{
|
|
buf[0]=1;
|
|
buf[1]|=0x80;
|
|
}
|
|
else
|
|
{
|
|
buf[0]|=(3<<(bit-1));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
buf[0]|=(1<<bit);
|
|
}
|
|
}
|
|
buf[0] &= ~mask;
|
|
if (bottom) /* set bottom bit if requested */
|
|
buf[bytes-1]|=1;
|
|
if (!BN_bin2bn(buf,bytes,rnd)) goto err;
|
|
ret=1;
|
|
err:
|
|
if (buf != NULL)
|
|
{
|
|
os_free_mem(NULL, buf);
|
|
}
|
|
return(ret);
|
|
}
|
|
|
|
static BN_MONT_CTX *BN_MONT_CTX_new(void)
|
|
{
|
|
BN_MONT_CTX *ret;
|
|
|
|
os_alloc_mem(NULL, (UCHAR **)&ret, sizeof(BN_MONT_CTX));
|
|
if (ret == NULL)
|
|
return(NULL);
|
|
|
|
ret->ri=0;
|
|
BN_init(&(ret->RR));
|
|
BN_init(&(ret->N));
|
|
ret->flags=BN_FLG_MALLOCED;
|
|
return(ret);
|
|
}
|
|
|
|
static void BN_CTX_free(BN_POOL *ctx)
|
|
{
|
|
if (ctx == NULL)
|
|
return;
|
|
BN_POOL_finish(ctx);
|
|
os_free_mem(NULL, ctx);
|
|
}
|
|
|
|
static void BN_MONT_CTX_free(BN_MONT_CTX *mont)
|
|
{
|
|
if(mont == NULL)
|
|
return;
|
|
|
|
BN_free(&(mont->RR));
|
|
BN_free(&(mont->N));
|
|
if (mont->flags & BN_FLG_MALLOCED)
|
|
os_free_mem(NULL, mont);
|
|
}
|
|
|
|
#define BN_MOD_MUL_WORD(r, w, m) (BN_mul_word(r, (w)) && ((BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
|
|
static int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
|
|
const BIGNUM *m, BN_POOL *ctx, BN_MONT_CTX *in_mont)
|
|
{
|
|
BN_MONT_CTX *mont = NULL;
|
|
int b, bits, ret=0;
|
|
int r_is_one;
|
|
BN_ULONG w, next_w;
|
|
BIGNUM *d, *r, *t;
|
|
BIGNUM *swap_tmp;
|
|
|
|
if (BN_get_flags(p, BN_FLG_EXP_CONSTTIME) != 0)
|
|
{
|
|
/* BN_FLG_EXP_CONSTTIME only supported by BN_mod_exp_mont() */
|
|
return -1;
|
|
}
|
|
|
|
if (!BN_is_odd(m))
|
|
{
|
|
return(0);
|
|
}
|
|
if (m->top == 1)
|
|
a %= m->d[0]; /* make sure that 'a' is reduced */
|
|
|
|
bits = BN_num_bits(p);
|
|
if (bits == 0)
|
|
{
|
|
ret = BN_one(rr);
|
|
return ret;
|
|
}
|
|
if (a == 0)
|
|
{
|
|
BN_zero(rr);
|
|
ret = 1;
|
|
return ret;
|
|
}
|
|
|
|
d = BN_POOL_get(ctx);
|
|
r = BN_POOL_get(ctx);
|
|
t = BN_POOL_get(ctx);
|
|
if (d == NULL || r == NULL || t == NULL) goto err;
|
|
|
|
if (in_mont != NULL)
|
|
mont=in_mont;
|
|
else
|
|
{
|
|
if ((mont = BN_MONT_CTX_new()) == NULL) goto err;
|
|
if (!BN_MONT_CTX_set(mont, m, ctx)) goto err;
|
|
}
|
|
|
|
r_is_one = 1; /* except for Montgomery factor */
|
|
|
|
/* bits-1 >= 0 */
|
|
|
|
/* The result is accumulated in the product r*w. */
|
|
w = a; /* bit 'bits-1' of 'p' is always set */
|
|
|
|
for (b = bits-2; b >= 0; b--)
|
|
{
|
|
/* First, square r*w. */
|
|
next_w = w*w;
|
|
if ((next_w/w) != w) /* overflow */
|
|
{
|
|
if (r_is_one)
|
|
{
|
|
if (!(BN_set_word(r, (w)) && BN_mod_mul_montgomery((r),(r),&((mont)->RR),(mont),(ctx)))) goto err;
|
|
r_is_one = 0;
|
|
}
|
|
else
|
|
{
|
|
if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
|
|
}
|
|
next_w = 1;
|
|
}
|
|
w = next_w;
|
|
if (!r_is_one)
|
|
{
|
|
if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) goto err;
|
|
}
|
|
|
|
/* Second, multiply r*w by 'a' if exponent bit is set. */
|
|
if (BN_is_bit_set(p, b))
|
|
{
|
|
next_w = w*a;
|
|
if ((next_w/a) != w) /* overflow */
|
|
{
|
|
if (r_is_one)
|
|
{
|
|
if (!(BN_set_word(r, (w)) && BN_mod_mul_montgomery((r),(r),&((mont)->RR),(mont),(ctx)))) goto err;
|
|
r_is_one = 0;
|
|
}
|
|
else
|
|
{
|
|
if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
|
|
}
|
|
next_w = a;
|
|
}
|
|
w = next_w;
|
|
}
|
|
}
|
|
|
|
/* Finally, set r:=r*w. */
|
|
if (w != 1)
|
|
{
|
|
if (r_is_one)
|
|
{
|
|
if (!(BN_set_word(r, (w)) && BN_mod_mul_montgomery((r),(r),&((mont)->RR),(mont),(ctx)))) goto err;
|
|
r_is_one = 0;
|
|
}
|
|
else
|
|
{
|
|
if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
|
|
}
|
|
}
|
|
|
|
if (r_is_one) /* can happen only if a == 1*/
|
|
{
|
|
if (!BN_one(rr)) goto err;
|
|
}
|
|
else
|
|
{
|
|
if (!BN_from_montgomery(rr, r, mont, ctx)) goto err;
|
|
}
|
|
ret = 1;
|
|
err:
|
|
if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
|
|
BN_POOL_release(ctx,3);
|
|
return(ret);
|
|
}
|
|
|
|
/* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout
|
|
* so that accessing any of these table values shows the same access pattern as far
|
|
* as cache lines are concerned. The following functions are used to transfer a BIGNUM
|
|
* from/to that table. */
|
|
static int MOD_EXP_CTIME_COPY_TO_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, int width)
|
|
{
|
|
size_t i, j;
|
|
|
|
if (bn_wexpand(b, top) == NULL)
|
|
return 0;
|
|
while (b->top < top)
|
|
{
|
|
b->d[b->top++] = 0;
|
|
}
|
|
|
|
for (i = 0, j=idx; i < top * sizeof b->d[0]; i++, j+=width)
|
|
{
|
|
buf[j] = ((unsigned char*)b->d)[i];
|
|
}
|
|
|
|
bn_correct_top(b);
|
|
return 1;
|
|
}
|
|
|
|
static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, int width)
|
|
{
|
|
size_t i, j;
|
|
|
|
if (bn_wexpand(b, top) == NULL)
|
|
return 0;
|
|
|
|
for (i=0, j=idx; i < top * sizeof b->d[0]; i++, j+=width)
|
|
{
|
|
((unsigned char*)b->d)[i] = buf[j];
|
|
}
|
|
|
|
b->top = top;
|
|
bn_correct_top(b);
|
|
return 1;
|
|
}
|
|
|
|
/* This variant of BN_mod_exp_mont() uses fixed windows and the special
|
|
* precomputation memory layout to limit data-dependency to a minimum
|
|
* to protect secret exponents (cf. the hyper-threading timing attacks
|
|
* pointed out by Colin Percival,
|
|
* http://www.daemonology.net/hyperthreading-considered-harmful
|
|
*/
|
|
static int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
|
|
const BIGNUM *m, BN_POOL *ctx, BN_MONT_CTX *in_mont)
|
|
{
|
|
int i,bits,ret=0,idx,window,wvalue;
|
|
int top;
|
|
BIGNUM *r;
|
|
const BIGNUM *aa;
|
|
BN_MONT_CTX *mont=NULL;
|
|
|
|
int numPowers;
|
|
unsigned char *powerbufFree=NULL;
|
|
int powerbufLen = 0;
|
|
unsigned char *powerbuf=NULL;
|
|
BIGNUM *computeTemp=NULL, *am=NULL;
|
|
|
|
top = m->top;
|
|
|
|
if (!(m->d[0] & 1))
|
|
{
|
|
return(0);
|
|
}
|
|
bits=BN_num_bits(p);
|
|
if (bits == 0)
|
|
{
|
|
ret = BN_one(rr);
|
|
return ret;
|
|
}
|
|
|
|
r = BN_POOL_get(ctx);
|
|
if (r == NULL) goto err;
|
|
|
|
/* Allocate a montgomery context if it was not supplied by the caller.
|
|
* If this is not done, things will break in the montgomery part.
|
|
*/
|
|
if (in_mont != NULL)
|
|
mont=in_mont;
|
|
else
|
|
{
|
|
if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
|
|
if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
|
|
}
|
|
|
|
/* Get the window size to use with size of p. */
|
|
window = BN_window_bits_for_ctime_exponent_size(bits);
|
|
|
|
/* Allocate a buffer large enough to hold all of the pre-computed
|
|
* powers of a.
|
|
*/
|
|
numPowers = 1 << window;
|
|
powerbufLen = sizeof(m->d[0])*top*numPowers;
|
|
os_alloc_mem(NULL, (UCHAR **)&powerbufFree, powerbufLen+MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH);
|
|
if (powerbufFree == NULL)
|
|
goto err;
|
|
|
|
powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
|
|
memset(powerbuf, 0, powerbufLen);
|
|
|
|
/* Initialize the intermediate result. Do this early to save double conversion,
|
|
* once each for a^0 and intermediate result.
|
|
*/
|
|
if (!BN_mod_mul_montgomery((r),(BN_value_one()),&((mont)->RR),(mont),(ctx))) goto err;
|
|
if (!MOD_EXP_CTIME_COPY_TO_PREBUF(r, top, powerbuf, 0, numPowers)) goto err;
|
|
|
|
/* Initialize computeTemp as a^1 with montgomery precalcs */
|
|
computeTemp = BN_POOL_get(ctx);
|
|
am = BN_POOL_get(ctx);
|
|
if (computeTemp==NULL || am==NULL) goto err;
|
|
|
|
if (BN_ucmp(a,m) >= 0)
|
|
{
|
|
if (!BN_mod(am,a,m,ctx))
|
|
goto err;
|
|
aa= am;
|
|
}
|
|
else
|
|
aa=a;
|
|
if (!BN_mod_mul_montgomery((am),(aa),&((mont)->RR),(mont),(ctx))) goto err;
|
|
if (!BN_copy(computeTemp, am)) goto err;
|
|
if (!MOD_EXP_CTIME_COPY_TO_PREBUF(am, top, powerbuf, 1, numPowers)) goto err;
|
|
|
|
/* If the window size is greater than 1, then calculate
|
|
* val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1)
|
|
* (even powers could instead be computed as (a^(i/2))^2
|
|
* to use the slight performance advantage of sqr over mul).
|
|
*/
|
|
if (window > 1)
|
|
{
|
|
for (i=2; i<numPowers; i++)
|
|
{
|
|
/* Calculate a^i = a^(i-1) * a */
|
|
if (!BN_mod_mul_montgomery(computeTemp,am,computeTemp,mont,ctx)) goto err;
|
|
if (!MOD_EXP_CTIME_COPY_TO_PREBUF(computeTemp, top, powerbuf, i, numPowers)) goto err;
|
|
}
|
|
}
|
|
|
|
/* Adjust the number of bits up to a multiple of the window size.
|
|
* If the exponent length is not a multiple of the window size, then
|
|
* this pads the most significant bits with zeros to normalize the
|
|
* scanning loop to there's no special cases.
|
|
*
|
|
* * NOTE: Making the window size a power of two less than the native
|
|
* * word size ensures that the padded bits won't go past the last
|
|
* * word in the internal BIGNUM structure. Going past the end will
|
|
* * still produce the correct result, but causes a different branch
|
|
* * to be taken in the BN_is_bit_set function.
|
|
*/
|
|
bits = ((bits+window-1)/window)*window;
|
|
idx=bits-1; /* The top bit of the window */
|
|
|
|
/* Scan the exponent one window at a time starting from the most
|
|
* significant bits.
|
|
*/
|
|
while (idx >= 0)
|
|
{
|
|
wvalue=0; /* The 'value' of the window */
|
|
|
|
/* Scan the window, squaring the result as we go */
|
|
for (i=0; i<window; i++,idx--)
|
|
{
|
|
if (!BN_mod_mul_montgomery(r,r,r,mont,ctx)) goto err;
|
|
wvalue = (wvalue<<1)+BN_is_bit_set(p,idx);
|
|
}
|
|
|
|
/* Fetch the appropriate pre-computed value from the pre-buf */
|
|
if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(computeTemp, top, powerbuf, wvalue, numPowers)) goto err;
|
|
|
|
/* Multiply the result into the intermediate result */
|
|
if (!BN_mod_mul_montgomery(r,r,computeTemp,mont,ctx)) goto err;
|
|
}
|
|
|
|
/* Convert the final result from montgomery to standard format */
|
|
if (!BN_from_montgomery(rr,r,mont,ctx)) goto err;
|
|
ret=1;
|
|
err:
|
|
if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
|
|
if (powerbuf!=NULL)
|
|
{
|
|
os_free_mem(NULL, powerbufFree);
|
|
}
|
|
if (am!=NULL) BN_clear(am);
|
|
if (computeTemp!=NULL) BN_clear(computeTemp);
|
|
BN_POOL_release(ctx,3);
|
|
return(ret);
|
|
}
|
|
|
|
static int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
|
|
const BIGNUM *m, BN_POOL *ctx, BN_MONT_CTX *in_mont)
|
|
{
|
|
int release=0;
|
|
int i,j,bits,ret=0,wstart,wend,window,wvalue;
|
|
int start=1;
|
|
BIGNUM *d,*r;
|
|
const BIGNUM *aa;
|
|
/* Table of variables obtained from 'ctx' */
|
|
BIGNUM *val[EXP_TABLE_SIZE];
|
|
BN_MONT_CTX *mont=NULL;
|
|
|
|
if (BN_get_flags(p, BN_FLG_EXP_CONSTTIME) != 0)
|
|
{
|
|
return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont);
|
|
}
|
|
|
|
if (!BN_is_odd(m))
|
|
{
|
|
return(0);
|
|
}
|
|
bits=BN_num_bits(p);
|
|
if (bits == 0)
|
|
{
|
|
ret = BN_one(rr);
|
|
return ret;
|
|
}
|
|
d = BN_POOL_get(ctx);
|
|
r = BN_POOL_get(ctx);
|
|
val[0] = BN_POOL_get(ctx);
|
|
release=3;
|
|
if (!d || !r || !val[0]) goto err;
|
|
|
|
/* If this is not done, things will break in the montgomery
|
|
* part */
|
|
|
|
if (in_mont != NULL)
|
|
mont=in_mont;
|
|
else
|
|
{
|
|
if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
|
|
if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
|
|
}
|
|
|
|
if (BN_ucmp(a,m) >= 0)
|
|
{
|
|
if (!(BN_mod(val[0], a, m, ctx))) goto err;
|
|
aa= val[0];
|
|
}
|
|
else
|
|
aa=a;
|
|
if (BN_is_zero(aa))
|
|
{
|
|
BN_zero(rr);
|
|
ret = 1;
|
|
goto err;
|
|
}
|
|
if (!BN_mod_mul_montgomery((val[0]),(aa),&((mont)->RR),(mont),(ctx))) goto err; /* 1 */
|
|
|
|
window = BN_window_bits_for_exponent_size(bits);
|
|
if (window > 1)
|
|
{
|
|
if (!BN_mod_mul_montgomery(d,val[0],val[0],mont,ctx)) goto err; /* 2 */
|
|
j=1<<(window-1);
|
|
for (i=1; i<j; i++)
|
|
{
|
|
if(((val[i] = BN_POOL_get(ctx)) == NULL) ||
|
|
!BN_mod_mul_montgomery(val[i],val[i-1],
|
|
d,mont,ctx))
|
|
goto err;
|
|
release++;
|
|
}
|
|
}
|
|
|
|
start=1; /* This is used to avoid multiplication etc
|
|
* when there is only the value '1' in the
|
|
* buffer. */
|
|
wvalue=0; /* The 'value' of the window */
|
|
wstart=bits-1; /* The top bit of the window */
|
|
wend=0; /* The bottom bit of the window */
|
|
|
|
if (!BN_mod_mul_montgomery((r),(BN_value_one()),&((mont)->RR),(mont),(ctx))) goto err;
|
|
for (;;)
|
|
{
|
|
if (BN_is_bit_set(p,wstart) == 0)
|
|
{
|
|
if (!start)
|
|
{
|
|
if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
|
|
goto err;
|
|
}
|
|
if (wstart == 0) break;
|
|
wstart--;
|
|
continue;
|
|
}
|
|
/* We now have wstart on a 'set' bit, we now need to work out
|
|
* how bit a window to do. To do this we need to scan
|
|
* forward until the last set bit before the end of the
|
|
* window */
|
|
j=wstart;
|
|
wvalue=1;
|
|
wend=0;
|
|
for (i=1; i<window; i++)
|
|
{
|
|
if (wstart-i < 0) break;
|
|
if (BN_is_bit_set(p,wstart-i))
|
|
{
|
|
wvalue<<=(i-wend);
|
|
wvalue|=1;
|
|
wend=i;
|
|
}
|
|
}
|
|
|
|
/* wend is the size of the current window */
|
|
j=wend+1;
|
|
/* add the 'bytes above' */
|
|
if (!start)
|
|
for (i=0; i<j; i++)
|
|
{
|
|
if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
|
|
goto err;
|
|
}
|
|
|
|
/* wvalue will be an odd number < 2^window */
|
|
if (!BN_mod_mul_montgomery(r,r,val[wvalue>>1],mont,ctx))
|
|
goto err;
|
|
|
|
/* move the 'window' down further */
|
|
wstart-=wend+1;
|
|
wvalue=0;
|
|
start=0;
|
|
if (wstart < 0) break;
|
|
}
|
|
if (!BN_from_montgomery(rr,r,mont,ctx)) goto err;
|
|
ret=1;
|
|
err:
|
|
if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
|
|
BN_POOL_release(ctx,release);
|
|
return(ret);
|
|
}
|
|
|
|
/*KERNEL MODULE TRACE*/
|
|
static unsigned char DH_P_VALUE[DH_KEY_LEN] =
|
|
{
|
|
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
|
0xC9, 0x0F, 0xDA, 0xA2, 0x21, 0x68, 0xC2, 0x34,
|
|
0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1,
|
|
0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74,
|
|
0x02, 0x0B, 0xBE, 0xA6, 0x3B, 0x13, 0x9B, 0x22,
|
|
0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD,
|
|
0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B,
|
|
0x30, 0x2B, 0x0A, 0x6D, 0xF2, 0x5F, 0x14, 0x37,
|
|
0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45,
|
|
0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6,
|
|
0xF4, 0x4C, 0x42, 0xE9, 0xA6, 0x37, 0xED, 0x6B,
|
|
0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED,
|
|
0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5,
|
|
0xAE, 0x9F, 0x24, 0x11, 0x7C, 0x4B, 0x1F, 0xE6,
|
|
0x49, 0x28, 0x66, 0x51, 0xEC, 0xE4, 0x5B, 0x3D,
|
|
0xC2, 0x00, 0x7C, 0xB8, 0xA1, 0x63, 0xBF, 0x05,
|
|
0x98, 0xDA, 0x48, 0x36, 0x1C, 0x55, 0xD3, 0x9A,
|
|
0x69, 0x16, 0x3F, 0xA8, 0xFD, 0x24, 0xCF, 0x5F,
|
|
0x83, 0x65, 0x5D, 0x23, 0xDC, 0xA3, 0xAD, 0x96,
|
|
0x1C, 0x62, 0xF3, 0x56, 0x20, 0x85, 0x52, 0xBB,
|
|
0x9E, 0xD5, 0x29, 0x07, 0x70, 0x96, 0x96, 0x6D,
|
|
0x67, 0x0C, 0x35, 0x4E, 0x4A, 0xBC, 0x98, 0x04,
|
|
0xF1, 0x74, 0x6C, 0x08, 0xCA, 0x23, 0x73, 0x27,
|
|
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
|
|
};
|
|
|
|
static DH *DH_new_method(void)
|
|
{
|
|
DH *ret;
|
|
|
|
os_alloc_mem(NULL, (UCHAR **)&ret, sizeof(DH));
|
|
if (ret == NULL)
|
|
{
|
|
return(NULL);
|
|
}
|
|
|
|
ret->p=NULL;
|
|
ret->g=NULL;
|
|
ret->pub_key=NULL;
|
|
ret->priv_key=NULL;
|
|
ret->flags=0;
|
|
return(ret);
|
|
}
|
|
|
|
static int dh_bn_mod_exp(const DH *dh, BIGNUM *r,
|
|
const BIGNUM *a, const BIGNUM *p,
|
|
const BIGNUM *m, BN_POOL *ctx,
|
|
BN_MONT_CTX *m_ctx)
|
|
{
|
|
/* If a is only one word long and constant time is false, use the faster
|
|
* exponenentiation function.
|
|
*/
|
|
if (a->top == 1 && ((dh->flags & DH_FLAG_NO_EXP_CONSTTIME) != 0))
|
|
{
|
|
BN_ULONG A = a->d[0];
|
|
return BN_mod_exp_mont_word(r,A,p,m,ctx,m_ctx);
|
|
}
|
|
else
|
|
return BN_mod_exp_mont(r,a,p,m,ctx,m_ctx);
|
|
}
|
|
|
|
static int generate_key(DH *dh)
|
|
{
|
|
int ok=0;
|
|
int generate_new_key=0;
|
|
unsigned lvar;
|
|
BN_POOL *ctx;
|
|
BN_MONT_CTX *mont=NULL;
|
|
BIGNUM *pub_key=NULL,*priv_key=NULL;
|
|
|
|
ctx = BN_CTX_new();
|
|
if (ctx == NULL) goto err;
|
|
|
|
if (dh->priv_key == NULL)
|
|
{
|
|
priv_key=BN_new();
|
|
if (priv_key == NULL) goto err;
|
|
generate_new_key=1;
|
|
}
|
|
else
|
|
priv_key=dh->priv_key;
|
|
|
|
if (dh->pub_key == NULL)
|
|
{
|
|
pub_key=BN_new();
|
|
if (pub_key == NULL) goto err;
|
|
}
|
|
else
|
|
pub_key=dh->pub_key;
|
|
|
|
if (generate_new_key)
|
|
{
|
|
lvar = BN_num_bits(dh->p)-1; /* secret exponent length */
|
|
if (!bnrand(0, priv_key, lvar, 0, 0)) goto err;
|
|
}
|
|
|
|
{
|
|
BIGNUM local_prk;
|
|
BIGNUM *prk;
|
|
|
|
if ((dh->flags & DH_FLAG_NO_EXP_CONSTTIME) == 0)
|
|
{
|
|
BN_init(&local_prk);
|
|
prk = &local_prk;
|
|
BN_with_flags(prk, priv_key, BN_FLG_EXP_CONSTTIME);
|
|
}
|
|
else
|
|
prk = priv_key;
|
|
|
|
if (!dh_bn_mod_exp(dh, pub_key, dh->g, prk, dh->p, ctx, mont)) goto err;
|
|
}
|
|
|
|
dh->pub_key=pub_key;
|
|
dh->priv_key=priv_key;
|
|
ok=1;
|
|
err:
|
|
if ((pub_key != NULL) && (dh->pub_key == NULL)) BN_free(pub_key);
|
|
if ((priv_key != NULL) && (dh->priv_key == NULL)) BN_free(priv_key);
|
|
BN_CTX_free(ctx);
|
|
return(ok);
|
|
}
|
|
|
|
static void DH_free(DH *r)
|
|
{
|
|
if(r == NULL) return;
|
|
|
|
if (r->p != NULL) BN_clear_free(r->p);
|
|
if (r->g != NULL) BN_clear_free(r->g);
|
|
if (r->pub_key != NULL) BN_clear_free(r->pub_key);
|
|
if (r->priv_key != NULL) BN_clear_free(r->priv_key);
|
|
os_free_mem(NULL, r);
|
|
}
|
|
|
|
static int compute_key(unsigned char *key, const BIGNUM *pub_key, DH *dh)
|
|
{
|
|
BN_POOL *ctx=NULL;
|
|
BN_MONT_CTX *mont=NULL;
|
|
BIGNUM *tmp;
|
|
int ret= -1;
|
|
|
|
if (BN_num_bits(dh->p) > OPENSSL_DH_MAX_MODULUS_BITS) goto err;
|
|
|
|
ctx = BN_CTX_new();
|
|
if (ctx == NULL) goto err;
|
|
tmp = BN_POOL_get(ctx);
|
|
|
|
if (dh->priv_key == NULL) goto err;
|
|
|
|
if (!dh_bn_mod_exp(dh, tmp, pub_key, dh->priv_key,dh->p,ctx,mont)) goto err;
|
|
|
|
ret=BN_bn2bin(tmp,key);
|
|
err:
|
|
BN_CTX_free(ctx);
|
|
return(ret);
|
|
}
|
|
|
|
static DH *DHSecret=NULL;
|
|
static DH *DHSecretR=NULL;
|
|
|
|
void GenerateDHPublicKey(unsigned char *ran_buf, int ran_len,
|
|
unsigned char *dhkey, int *dhkey_len)
|
|
{
|
|
unsigned int DH_G_VALUE = 2;
|
|
unsigned int g = htonl(DH_G_VALUE);
|
|
|
|
*dhkey_len = 0;
|
|
if(DHSecret == NULL)
|
|
{
|
|
DHSecret = DH_new_method();
|
|
if(NULL == DHSecret) goto err;
|
|
DHSecret->p = BN_new();
|
|
DHSecret->g = BN_new();
|
|
}
|
|
else if((DHSecret != NULL) && (DHSecretR == NULL))
|
|
{
|
|
DH_free(DHSecret);
|
|
DHSecret = NULL;
|
|
DHSecret = DH_new_method();
|
|
if(NULL == DHSecret) goto err;
|
|
DHSecret->p = BN_new();
|
|
DHSecret->g = BN_new();
|
|
}
|
|
else if((DHSecret != NULL) && (DHSecretR != NULL))
|
|
{
|
|
DHPRINT("reuse DHSecret\n");
|
|
}
|
|
|
|
BN_bin2bn(DH_P_VALUE, DH_KEY_LEN, DHSecret->p);
|
|
BN_bin2bn((const unsigned char *)&g, 4, DHSecret->g);
|
|
|
|
/*
|
|
if (!DH_check(DHSecret, &i)) goto err;
|
|
if (i & DH_CHECK_P_NOT_PRIME)
|
|
DHPRINT( "p value is not prime\n");
|
|
if (i & DH_CHECK_P_NOT_SAFE_PRIME)
|
|
DHPRINT( "p value is not a safe prime\n");
|
|
if (i & DH_UNABLE_TO_CHECK_GENERATOR)
|
|
DHPRINT( "unable to check the generator value\n");
|
|
if (i & DH_NOT_SUITABLE_GENERATOR)
|
|
DHPRINT( "the g value is not a generator\n");
|
|
*/
|
|
|
|
/*DHSecret->flags &= ~DH_FLAG_NO_EXP_CONSTTIME; */
|
|
DHSecret->flags |= DH_FLAG_NO_EXP_CONSTTIME;
|
|
if (!generate_key(DHSecret)) goto err;
|
|
BN_bn2bin(DHSecret->pub_key,dhkey);
|
|
*dhkey_len = BN_num_bytes(DHSecret->pub_key);
|
|
DHPRINT("GenerateDHPublicKey Success dhkey_len%d\n",*dhkey_len);
|
|
/* DHSecret will use and free on GenerateDHSecreteKey. NO free it */
|
|
return;
|
|
err:
|
|
DH_free(DHSecret);
|
|
DHSecret = NULL;
|
|
DHPRINT("GenerateDHPublicKey EXIT\n");
|
|
return;
|
|
}
|
|
|
|
/******* KDK generation ****** */
|
|
/*generate the DH shared secret */
|
|
void GenerateDHSecreteKey(unsigned char *ran_buf, int ran_len,
|
|
unsigned char *peer_dhkey, int peer_dhkey_len,
|
|
unsigned char *secrete_dhkey, int *secrete_dhkey_len)
|
|
{
|
|
BIGNUM DH_PubKey_Peer;
|
|
|
|
*secrete_dhkey_len = 0;
|
|
if(NULL == secrete_dhkey) goto err;
|
|
if(NULL == DHSecret) goto err;
|
|
|
|
DH_PubKey_Peer.flags=BN_FLG_MALLOCED;
|
|
DH_PubKey_Peer.top=0;
|
|
DH_PubKey_Peer.dmax=0;
|
|
DH_PubKey_Peer.d=NULL;
|
|
|
|
BN_bin2bn(peer_dhkey, peer_dhkey_len, &DH_PubKey_Peer);
|
|
*secrete_dhkey_len = compute_key(secrete_dhkey,&DH_PubKey_Peer,DHSecret);
|
|
if (DH_PubKey_Peer.d != NULL)
|
|
os_free_mem(NULL, DH_PubKey_Peer.d);
|
|
DHPRINT("GenerateDHSecreteKey Success\n");
|
|
err:
|
|
DH_free(DHSecret);
|
|
DHSecret = NULL;
|
|
DHPRINT("GenerateDHSecreteKey EXIT\n");
|
|
return;
|
|
}
|
|
|
|
void DH_freeall(void)
|
|
{
|
|
DH_free(DHSecret);
|
|
DHSecret = NULL;
|
|
return;
|
|
}
|
|
|
|
#else /* CRYPT_GPL_ALGORITHM */
|
|
|
|
/*
|
|
========================================================================
|
|
Routine Description:
|
|
Diffie-Hellman public key generation
|
|
|
|
Arguments:
|
|
GValue Array in UINT8
|
|
GValueLength The length of G in bytes
|
|
PValue Array in UINT8
|
|
PValueLength The length of P in bytes
|
|
PrivateKey Private key
|
|
PrivateKeyLength The length of Private key in bytes
|
|
|
|
Return Value:
|
|
PublicKey Public key
|
|
PublicKeyLength The length of public key in bytes
|
|
|
|
Note:
|
|
Reference to RFC2631
|
|
PublicKey = G^PrivateKey (mod P)
|
|
========================================================================
|
|
*/
|
|
void DH_PublicKey_Generate (
|
|
IN UINT8 GValue[],
|
|
IN UINT GValueLength,
|
|
IN UINT8 PValue[],
|
|
IN UINT PValueLength,
|
|
IN UINT8 PrivateKey[],
|
|
IN UINT PrivateKeyLength,
|
|
OUT UINT8 PublicKey[],
|
|
INOUT UINT *PublicKeyLength)
|
|
{
|
|
PBIG_INTEGER pBI_G = NULL;
|
|
PBIG_INTEGER pBI_P = NULL;
|
|
PBIG_INTEGER pBI_PrivateKey = NULL;
|
|
PBIG_INTEGER pBI_PublicKey = NULL;
|
|
|
|
/*
|
|
* 1. Check the input parameters
|
|
* - GValueLength, PValueLength and PrivateLength must be large than zero
|
|
* - PublicKeyLength must be large or equal than PValueLength
|
|
* - PValue must be odd
|
|
*
|
|
* - PValue must be prime number (no implement)
|
|
* - GValue must be greater than 0 but less than the PValue (no implement)
|
|
*/
|
|
if (GValueLength == 0) {
|
|
DBGPRINT(RT_DEBUG_ERROR, ("DH_PublicKey_Generate: G length is (%d)\n", GValueLength));
|
|
return;
|
|
} /* End of if */
|
|
if (PValueLength == 0) {
|
|
DBGPRINT(RT_DEBUG_ERROR, ("DH_PublicKey_Generate: P length is (%d)\n", PValueLength));
|
|
return;
|
|
} /* End of if */
|
|
if (PrivateKeyLength == 0) {
|
|
DBGPRINT(RT_DEBUG_ERROR, ("DH_PublicKey_Generate: private key length is (%d)\n", PrivateKeyLength));
|
|
return;
|
|
} /* End of if */
|
|
if (*PublicKeyLength < PValueLength) {
|
|
DBGPRINT(RT_DEBUG_ERROR, ("DH_PublicKey_Generate: public key length(%d) must be large or equal than P length(%d)\n",
|
|
*PublicKeyLength, PValueLength));
|
|
return;
|
|
} /* End of if */
|
|
if (!(PValue[PValueLength - 1] & 0x1)) {
|
|
DBGPRINT(RT_DEBUG_ERROR, ("DH_PublicKey_Generate: P value must be odd\n"));
|
|
return;
|
|
} /* End of if */
|
|
|
|
/*
|
|
* 2. Transfer parameters to BigInteger structure
|
|
*/
|
|
BigInteger_Init(&pBI_G);
|
|
BigInteger_Init(&pBI_P);
|
|
BigInteger_Init(&pBI_PrivateKey);
|
|
BigInteger_Init(&pBI_PublicKey);
|
|
BigInteger_Bin2BI(GValue, GValueLength, &pBI_G);
|
|
BigInteger_Bin2BI(PValue, PValueLength, &pBI_P);
|
|
BigInteger_Bin2BI(PrivateKey, PrivateKeyLength, &pBI_PrivateKey);
|
|
|
|
/*
|
|
* 3. Calculate PublicKey = G^PrivateKey (mod P)
|
|
* - BigInteger Operation
|
|
* - Montgomery reduction
|
|
*/
|
|
BigInteger_Montgomery_ExpMod(pBI_G, pBI_PrivateKey, pBI_P, &pBI_PublicKey);
|
|
|
|
/*
|
|
* 4. Transfer BigInteger structure to char array
|
|
*/
|
|
BigInteger_BI2Bin(pBI_PublicKey, PublicKey, PublicKeyLength);
|
|
|
|
BigInteger_Free(&pBI_G);
|
|
BigInteger_Free(&pBI_P);
|
|
BigInteger_Free(&pBI_PrivateKey);
|
|
BigInteger_Free(&pBI_PublicKey);
|
|
} /* End of DH_PublicKey_Generate */
|
|
|
|
|
|
/*
|
|
========================================================================
|
|
Routine Description:
|
|
Diffie-Hellman secret key generation
|
|
|
|
Arguments:
|
|
PublicKey Public key
|
|
PublicKeyLength The length of Public key in bytes
|
|
PValue Array in UINT8
|
|
PValueLength The length of P in bytes
|
|
PrivateKey Private key
|
|
PrivateKeyLength The length of Private key in bytes
|
|
|
|
Return Value:
|
|
SecretKey Secret key
|
|
SecretKeyLength The length of secret key in bytes
|
|
|
|
Note:
|
|
Reference to RFC2631
|
|
SecretKey = PublicKey^PrivateKey (mod P)
|
|
========================================================================
|
|
*/
|
|
void DH_SecretKey_Generate (
|
|
IN UINT8 PublicKey[],
|
|
IN UINT PublicKeyLength,
|
|
IN UINT8 PValue[],
|
|
IN UINT PValueLength,
|
|
IN UINT8 PrivateKey[],
|
|
IN UINT PrivateKeyLength,
|
|
OUT UINT8 SecretKey[],
|
|
INOUT UINT *SecretKeyLength)
|
|
{
|
|
PBIG_INTEGER pBI_P = NULL;
|
|
PBIG_INTEGER pBI_SecretKey = NULL;
|
|
PBIG_INTEGER pBI_PrivateKey = NULL;
|
|
PBIG_INTEGER pBI_PublicKey = NULL;
|
|
|
|
/*
|
|
* 1. Check the input parameters
|
|
* - PublicKeyLength, PValueLength and PrivateLength must be large than zero
|
|
* - SecretKeyLength must be large or equal than PValueLength
|
|
* - PValue must be odd
|
|
*
|
|
* - PValue must be prime number (no implement)
|
|
*/
|
|
if (PublicKeyLength == 0) {
|
|
DBGPRINT(RT_DEBUG_ERROR, ("DH_SecretKey_Generate: public key length is (%d)\n", PublicKeyLength));
|
|
return;
|
|
} /* End of if */
|
|
if (PValueLength == 0) {
|
|
DBGPRINT(RT_DEBUG_ERROR, ("DH_SecretKey_Generate: P length is (%d)\n", PValueLength));
|
|
return;
|
|
} /* End of if */
|
|
if (PrivateKeyLength == 0) {
|
|
DBGPRINT(RT_DEBUG_ERROR, ("DH_SecretKey_Generate: private key length is (%d)\n", PrivateKeyLength));
|
|
return;
|
|
} /* End of if */
|
|
if (*SecretKeyLength < PValueLength) {
|
|
DBGPRINT(RT_DEBUG_ERROR, ("DH_SecretKey_Generate: secret key length(%d) must be large or equal than P length(%d)\n",
|
|
*SecretKeyLength, PValueLength));
|
|
return;
|
|
} /* End of if */
|
|
if (!(PValue[PValueLength - 1] & 0x1)) {
|
|
DBGPRINT(RT_DEBUG_ERROR, ("DH_SecretKey_Generate: P value must be odd\n"));
|
|
return;
|
|
} /* End of if */
|
|
|
|
/*
|
|
* 2. Transfer parameters to BigInteger structure
|
|
*/
|
|
BigInteger_Init(&pBI_P);
|
|
BigInteger_Init(&pBI_PrivateKey);
|
|
BigInteger_Init(&pBI_PublicKey);
|
|
BigInteger_Init(&pBI_SecretKey);
|
|
|
|
BigInteger_Bin2BI(PublicKey, PublicKeyLength, &pBI_PublicKey);
|
|
BigInteger_Bin2BI(PValue, PValueLength, &pBI_P);
|
|
BigInteger_Bin2BI(PrivateKey, PrivateKeyLength, &pBI_PrivateKey);
|
|
|
|
/*
|
|
* 3. Calculate SecretKey = PublicKey^PrivateKey (mod P)
|
|
* - BigInteger Operation
|
|
* - Montgomery reduction
|
|
*/
|
|
BigInteger_Montgomery_ExpMod(pBI_PublicKey, pBI_PrivateKey, pBI_P, &pBI_SecretKey);
|
|
|
|
/*
|
|
* 4. Transfer BigInteger structure to char array
|
|
*/
|
|
BigInteger_BI2Bin(pBI_SecretKey, SecretKey, SecretKeyLength);
|
|
|
|
BigInteger_Free(&pBI_P);
|
|
BigInteger_Free(&pBI_PrivateKey);
|
|
BigInteger_Free(&pBI_PublicKey);
|
|
BigInteger_Free(&pBI_SecretKey);
|
|
} /* End of DH_SecretKey_Generate */
|
|
|
|
#endif /* CRYPT_GPL_ALGORITHM */
|