/**************************************************************************** * Ralink Tech Inc. * Taiwan, R.O.C. * * (c) Copyright 2002, Ralink Technology, Inc. * * All rights reserved. Ralink's source code is an unpublished work and the * use of a copyright notice does not imply otherwise. This source code * contains confidential trade secret material of Ralink Tech. Any attemp * or participation in deciphering, decoding, reverse engineering or in any * way altering the source code is stricitly prohibited, unless the prior * written consent of Ralink Technology, Inc. is obtained. ***************************************************************************/ #ifndef CRYPT_GPL_ALGORITHM /**************************************************************************** Module Name: DH Abstract: RFC 2631: Diffie-Hellman Key Agreement Method Revision History: Who When What -------- ---------- ------------------------------------------ Eddy 2009/01/21 Create Diffie-Hellman ***************************************************************************/ #endif /* CRYPT_GPL_ALGORITHM */ #include "crypt_dh.h" #include "crypt_biginteger.h" #ifdef CRYPT_GPL_ALGORITHM /*dh_key.h START*/ #define GETPID_IS_MEANINGLESS #ifndef DHPRINT #define DHPRINT(fmt, args...) DBGPRINT(RT_DEBUG_OFF, (fmt, ## args)) #endif typedef struct bignum_st BIGNUM; typedef struct bn_mont_ctx_st BN_MONT_CTX; typedef struct env_md_st EVP_MD; typedef struct env_md_ctx_st EVP_MD_CTX; typedef struct dh_st DH; #define BN_MUL_COMBA #define BN_SQR_COMBA #define BN_ULLONG unsigned long #define BN_ULONG unsigned int #define BN_BITS 64 #define BN_BYTES 4 #define BN_BITS2 32 #define BN_BITS4 16 #define BN_MASK (0xffffffffffffffffLL) #define BN_MASK2 (0xffffffffL) #define BN_MASK2l (0xffff) #define BN_MASK2h1 (0xffff8000L) #define BN_MASK2h (0xffff0000L) #define BN_TBIT (0x80000000L) #define BN_FLG_MALLOCED 0x01 #define BN_FLG_STATIC_DATA 0x02 #define BN_FLG_EXP_CONSTTIME 0x04 /* avoid leaking exponent information through timings * (BN_mod_exp_mont() will call BN_mod_exp_mont_consttime) */ #define BN_set_flags(b,n) ((b)->flags|=(n)) #define BN_get_flags(b,n) ((b)->flags&(n)) /* get a clone of a BIGNUM with changed flags, for *temporary* use only * (the two BIGNUMs cannot not be used in parallel!) */ #define BN_with_flags(dest,b,n) ((dest)->d=(b)->d, \ (dest)->top=(b)->top, \ (dest)->dmax=(b)->dmax, \ (dest)->flags=(((dest)->flags & BN_FLG_MALLOCED) \ | ((b)->flags & ~BN_FLG_MALLOCED) \ | BN_FLG_STATIC_DATA \ | (n))) struct bignum_st { BN_ULONG *d; /* Pointer to an array of 'BN_BITS2' bit chunks. */ int top; /* Index of last used d +1. */ /* The next are internal book keeping for bn_expand. */ int dmax; /* Size of the d array. */ int flags; }; /* Used for montgomery multiplication */ struct bn_mont_ctx_st { int ri; /* number of bits in R */ BIGNUM RR; /* used to convert to montgomery form */ BIGNUM N; /* The modulus */ BN_ULONG n0; /* least significant word of Ni */ int flags; }; #define BN_num_bytes(a) ((BN_num_bits(a)+7)/8) #define BN_is_zero(a) ((a)->top == 0) #define BN_is_one(a) (((a)->top == 1) && ((a)->d[0] == (BN_ULONG)(1))) #define BN_is_odd(a) (((a)->top > 0) && ((a)->d[0] & 1)) #define BN_one(a) (BN_set_word((a),1)) #define BN_zero(a) (BN_set_word((a),0)) #define bn_wexpand(a,words) (((words) <= (a)->dmax)?(a):bn_expand2((a),(words))) #define bn_correct_top(a) \ { \ BN_ULONG *ftl; \ if ((a)->top > 0) \ { \ for (ftl= &((a)->d[(a)->top-1]); (a)->top > 0; (a)->top--) \ if (*(ftl--)) break; \ } \ } /* * BN_window_bits_for_exponent_size -- macro for sliding window mod_exp functions * * * For window size 'w' (w >= 2) and a random 'b' bits exponent, * the number of multiplications is a constant plus on average * * 2^(w-1) + (b-w)/(w+1); * * here 2^(w-1) is for precomputing the table (we actually need * entries only for windows that have the lowest bit set), and * (b-w)/(w+1) is an approximation for the expected number of * w-bit windows, not counting the first one. * * Thus we should use * * w >= 6 if b > 671 * w = 5 if 671 > b > 239 * w = 4 if 239 > b > 79 * w = 3 if 79 > b > 23 * w <= 2 if 23 > b * * (with draws in between). Very small exponents are often selected * with low Hamming weight, so we use w = 1 for b <= 23. */ #define BN_window_bits_for_exponent_size(b) \ ((b) > 671 ? 6 : \ (b) > 239 ? 5 : \ (b) > 79 ? 4 : \ (b) > 23 ? 3 : 1) /* BN_mod_exp_mont_conttime is based on the assumption that the * L1 data cache line width of the target processor is at least * the following value. */ #define MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH ( 64 ) #define MOD_EXP_CTIME_MIN_CACHE_LINE_MASK (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - 1) /* Window sizes optimized for fixed window size modular exponentiation * algorithm (BN_mod_exp_mont_consttime). * * To achieve the security goals of BN_mode_exp_mont_consttime, the * maximum size of the window must not exceed * log_2(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH). * * Window size thresholds are defined for cache line sizes of 32 and 64, * cache line sizes where log_2(32)=5 and log_2(64)=6 respectively. A * window size of 7 should only be used on processors that have a 128 * byte or greater cache line size. */ #if MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 64 # define BN_window_bits_for_ctime_exponent_size(b) \ ((b) > 937 ? 6 : \ (b) > 306 ? 5 : \ (b) > 89 ? 4 : \ (b) > 22 ? 3 : 1) # define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (6) #elif MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 32 # define BN_window_bits_for_ctime_exponent_size(b) \ ((b) > 306 ? 5 : \ (b) > 89 ? 4 : \ (b) > 22 ? 3 : 1) # define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (5) #endif #define BN_MULL_SIZE_NORMAL (16) /* 32 */ #define BN_MUL_RECURSIVE_SIZE_NORMAL (16) /* 32 less than */ #define BN_SQR_RECURSIVE_SIZE_NORMAL (16) /* 32 */ /************************************************************* * Using the long long type */ #define Lw(t) (((BN_ULONG)(t))&BN_MASK2) #define Hw(t) (((BN_ULONG)((t)>>BN_BITS2))&BN_MASK2) /************************************************************* * No long long type */ #define LBITS(a) ((a)&BN_MASK2l) #define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l) #define L2HBITS(a) (((a)<>BN_BITS2)&BN_MASKl) #define LL2HBITS(a) ((BN_ULLONG)((a)&BN_MASKl)<>(BN_BITS4-1); \ m =(m&BN_MASK2l)<<(BN_BITS4+1); \ l=(l+m)&BN_MASK2; if (l < m) h++; \ (lo)=l; \ (ho)=h; \ } #define mul_add(r,a,bl,bh,c) { \ BN_ULONG l,h; \ \ h= (a); \ l=LBITS(h); \ h=HBITS(h); \ mul64(l,h,(bl),(bh)); \ \ /* non-multiply part */ \ l=(l+(c))&BN_MASK2; if (l < (c)) h++; \ (c)=(r); \ l=(l+(c))&BN_MASK2; if (l < (c)) h++; \ (c)=h&BN_MASK2; \ (r)=l; \ } #define mul(r,a,bl,bh,c) { \ BN_ULONG l,h; \ \ h= (a); \ l=LBITS(h); \ h=HBITS(h); \ mul64(l,h,(bl),(bh)); \ \ /* non-multiply part */ \ l+=(c); if ((l&BN_MASK2) < (c)) h++; \ (c)=h&BN_MASK2; \ (r)=l&BN_MASK2; \ } #define INIT_DATA_h0 0x67452301UL #define INIT_DATA_h1 0xefcdab89UL #define INIT_DATA_h2 0x98badcfeUL #define INIT_DATA_h3 0x10325476UL #define INIT_DATA_h4 0xc3d2e1f0UL #define STATE_SIZE 1023 #define EXP_TABLE_SIZE 32 #define BN_mod(rem,m,d,ctx) BN_div(NULL,(rem),(m),(d),(ctx)) #define BN_CTX_POOL_SIZE 16 #define MOD_EXP_CTIME_ALIGN(x_) ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((BN_ULONG)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK)))) #define ENTROPY_NEEDED 32 /* require 256 bits = 32 bytes of randomness */ #define DH_FLAG_CACHE_MONT_P 0x01 #define DH_FLAG_NO_EXP_CONSTTIME 0x02 /* new with 0.9.7h; the built-in DH * implementation now uses constant time * modular exponentiation for secret exponents * by default. This flag causes the * faster variable sliding window method to * be used for all exponents. */ #define OPENSSL_DH_MAX_MODULUS_BITS 10000 struct dh_st { BIGNUM *p; BIGNUM *g; BIGNUM *pub_key; /* g^x */ BIGNUM *priv_key; /* x */ int flags; }; struct env_md_st { int type; int md_size; unsigned long flags; int (*init)(EVP_MD_CTX *ctx); int (*update)(EVP_MD_CTX *ctx,const void *data,size_t count); int (*final)(EVP_MD_CTX *ctx,unsigned char *md); int ctx_size; /* how big does the ctx->md_data need to be */ } /* EVP_MD */; struct env_md_ctx_st { const EVP_MD *digest; void *md_data; } /* EVP_MD_CTX */; /* A bundle of bignums that can be linked with other bundles */ typedef struct bignum_pool_item { /* The bignum values */ BIGNUM vals[BN_CTX_POOL_SIZE]; /* Linked-list admin */ struct bignum_pool_item *prev, *next; } BN_POOL_ITEM; /* A linked-list of bignums grouped in bundles */ typedef struct bignum_pool { /* Linked-list admin */ BN_POOL_ITEM *head, *currentvar, *tail; /* Stack depth and allocation size */ unsigned used, size; } BN_POOL; /*dh_key.h END*/ /*sha_locl.h START*/ #define SHA_LONG unsigned int #define SHA_LBLOCK 16 #define SHA_CBLOCK (SHA_LBLOCK*4) /* SHA treats input data as a contiguous array of 32 bit wide big-endian values. */ #define SHA_DIGEST_LENGTH 20 typedef struct SHAstate_st { SHA_LONG h0,h1,h2,h3,h4; SHA_LONG Nl,Nh; SHA_LONG data[SHA_LBLOCK]; unsigned int num; } OPEN_SHA_CTX; #define Xupdate(a,ix,ia,ib,ic,id) ((a)=(ia^ib^ic^id), ix=(a)=ROTATE((a),1)) #ifndef ROTATE #define ROTATE(a,n) (((a)<<(n))|(((a)&0xffffffff)>>(32-(n)))) #endif #define HASH_MAKE_STRING(c,s) do { \ unsigned long ll; \ ll=(c)->h0; HOST_l2c(ll,(s)); \ ll=(c)->h1; HOST_l2c(ll,(s)); \ ll=(c)->h2; HOST_l2c(ll,(s)); \ ll=(c)->h3; HOST_l2c(ll,(s)); \ ll=(c)->h4; HOST_l2c(ll,(s)); \ } while (0) #ifndef HOST_c2l #define HOST_c2l(c,l) (l =(((unsigned long)(*((c)++)))<<24), \ l|=(((unsigned long)(*((c)++)))<<16), \ l|=(((unsigned long)(*((c)++)))<< 8), \ l|=(((unsigned long)(*((c)++))) ), \ l) #endif #define HOST_p_c2l(c,l,n) { \ switch (n) { \ case 0: l =((unsigned long)(*((c)++)))<<24; \ case 1: l|=((unsigned long)(*((c)++)))<<16; \ case 2: l|=((unsigned long)(*((c)++)))<< 8; \ case 3: l|=((unsigned long)(*((c)++))); \ } } #define HOST_p_c2l_p(c,l,sc,len) { \ switch (sc) { \ case 0: l =((unsigned long)(*((c)++)))<<24; \ if (--len == 0) break; \ case 1: l|=((unsigned long)(*((c)++)))<<16; \ if (--len == 0) break; \ case 2: l|=((unsigned long)(*((c)++)))<< 8; \ } } /* NOTE the pointer is not incremented at the end of this */ #define HOST_c2l_p(c,l,n) { \ l=0; (c)+=n; \ switch (n) { \ case 3: l =((unsigned long)(*(--(c))))<< 8; \ case 2: l|=((unsigned long)(*(--(c))))<<16; \ case 1: l|=((unsigned long)(*(--(c))))<<24; \ } } #ifndef HOST_l2c #define HOST_l2c(l,c) (*((c)++)=(unsigned char)(((l)>>24)&0xff), \ *((c)++)=(unsigned char)(((l)>>16)&0xff), \ *((c)++)=(unsigned char)(((l)>> 8)&0xff), \ *((c)++)=(unsigned char)(((l) )&0xff), \ l) #endif #define K_00_19 0x5a827999UL #define K_20_39 0x6ed9eba1UL #define K_40_59 0x8f1bbcdcUL #define K_60_79 0xca62c1d6UL /* As pointed out by Wei Dai , F() below can be * simplified to the code in F_00_19. Wei attributes these optimisations * to Peter Gutmann's SHS code, and he attributes it to Rich Schroeppel. * #define F(x,y,z) (((x) & (y)) | ((~(x)) & (z))) * I've just become aware of another tweak to be made, again from Wei Dai, * in F_40_59, (x&a)|(y&a) -> (x|y)&a */ #define F_00_19(b,c,d) ((((c) ^ (d)) & (b)) ^ (d)) #define F_20_39(b,c,d) ((b) ^ (c) ^ (d)) #define F_40_59(b,c,d) (((b) & (c)) | (((b)|(c)) & (d))) #define F_60_79(b,c,d) F_20_39(b,c,d) #define BODY_00_15(xi) do { \ T=E+K_00_19+F_00_19(B,C,D); \ E=D, D=C, C=ROTATE(B,30), B=A; \ A=ROTATE(A,5)+T+xi; } while(0) #define BODY_16_19(xa,xb,xc,xd) do { \ Xupdate(T,xa,xa,xb,xc,xd); \ T+=E+K_00_19+F_00_19(B,C,D); \ E=D, D=C, C=ROTATE(B,30), B=A; \ A=ROTATE(A,5)+T; } while(0) #define BODY_20_39(xa,xb,xc,xd) do { \ Xupdate(T,xa,xa,xb,xc,xd); \ T+=E+K_20_39+F_20_39(B,C,D); \ E=D, D=C, C=ROTATE(B,30), B=A; \ A=ROTATE(A,5)+T; } while(0) #define BODY_40_59(xa,xb,xc,xd) do { \ Xupdate(T,xa,xa,xb,xc,xd); \ T+=E+K_40_59+F_40_59(B,C,D); \ E=D, D=C, C=ROTATE(B,30), B=A; \ A=ROTATE(A,5)+T; } while(0) #define BODY_60_79(xa,xb,xc,xd) do { \ Xupdate(T,xa,xa,xb,xc,xd); \ T=E+K_60_79+F_60_79(B,C,D); \ E=D, D=C, C=ROTATE(B,30), B=A; \ A=ROTATE(A,5)+T+xa; } while(0) static void sha1_block_host_order (OPEN_SHA_CTX *c, const void *d, size_t num) { const SHA_LONG *W=d; register unsigned long A,B,C,D,E,T; int i; SHA_LONG X[16]; A=c->h0; B=c->h1; C=c->h2; D=c->h3; E=c->h4; for (;;) { for (i=0;i<16;i++) { X[i]=W[i]; BODY_00_15(X[i]); } for (i=0;i<4;i++) { BODY_16_19(X[i], X[i+2], X[i+8], X[(i+13)&15]); } for (;i<24;i++) { BODY_20_39(X[i&15], X[(i+2)&15], X[(i+8)&15],X[(i+13)&15]); } for (i=0;i<20;i++) { BODY_40_59(X[(i+8)&15],X[(i+10)&15],X[i&15], X[(i+5)&15]); } for (i=4;i<24;i++) { BODY_60_79(X[(i+8)&15],X[(i+10)&15],X[i&15], X[(i+5)&15]); } c->h0=(c->h0+A)&0xffffffffL; c->h1=(c->h1+B)&0xffffffffL; c->h2=(c->h2+C)&0xffffffffL; c->h3=(c->h3+D)&0xffffffffL; c->h4=(c->h4+E)&0xffffffffL; if (--num == 0) break; A=c->h0; B=c->h1; C=c->h2; D=c->h3; E=c->h4; W+=SHA_LBLOCK; } } static void sha1_block_data_order (OPEN_SHA_CTX *c, const void *p, size_t num) { const unsigned char *data=p; register unsigned long A,B,C,D,E,T,l; int i; SHA_LONG X[16]; A=c->h0; B=c->h1; C=c->h2; D=c->h3; E=c->h4; for (;;) { for (i=0;i<16;i++) { HOST_c2l(data,l); X[i]=l; BODY_00_15(X[i]); } for (i=0;i<4;i++) { BODY_16_19(X[i], X[i+2], X[i+8], X[(i+13)&15]); } for (;i<24;i++) { BODY_20_39(X[i&15], X[(i+2)&15], X[(i+8)&15],X[(i+13)&15]); } for (i=0;i<20;i++) { BODY_40_59(X[(i+8)&15],X[(i+10)&15],X[i&15], X[(i+5)&15]); } for (i=4;i<24;i++) { BODY_60_79(X[(i+8)&15],X[(i+10)&15],X[i&15], X[(i+5)&15]); } c->h0=(c->h0+A)&0xffffffffL; c->h1=(c->h1+B)&0xffffffffL; c->h2=(c->h2+C)&0xffffffffL; c->h3=(c->h3+D)&0xffffffffL; c->h4=(c->h4+E)&0xffffffffL; if (--num == 0) break; A=c->h0; B=c->h1; C=c->h2; D=c->h3; E=c->h4; } } /*sha_locl.h END*/ #define NID_sha1 64 static int g_state_num=0,g_state_index=0; static unsigned char g_md[SHA_DIGEST_LENGTH]; static unsigned char g_state[STATE_SIZE+SHA_DIGEST_LENGTH]; static long g_md_count[2]={0,0}; static long g_entropy=0; static int RAND_initialized=0; static BIGNUM *bn_expand2(BIGNUM *b, int words); static int HASH_FINAL (unsigned char *md, OPEN_SHA_CTX*c); static int HASH_UPDATE (OPEN_SHA_CTX *c, const void *data_, size_t len); static int HASH_INIT (OPEN_SHA_CTX *c); static int RAND_poll(void); static int RAND_init(EVP_MD_CTX *ctx) { return HASH_INIT(ctx->md_data); } static int RAND_update(EVP_MD_CTX *ctx,const void *data,size_t count) { return HASH_UPDATE(ctx->md_data,data,count); } static int RAND_final(EVP_MD_CTX *ctx,unsigned char *md) { return HASH_FINAL(md,ctx->md_data); } static void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb); #ifdef BN_RECURSION static void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, int dna, int dnb, BN_ULONG *t); static void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, int tna, int tnb, BN_ULONG *t); static void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b); #endif /* BN_RECURSION */ static int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, BN_POOL *ctx, BN_MONT_CTX *in_mont); static int BN_rshift1(BIGNUM *r, const BIGNUM *a); static BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w); static BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w); static BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n); static BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n); static void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b); static void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a); static void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a); static int HASH_FINAL (unsigned char *md, OPEN_SHA_CTX*c) { register SHA_LONG *p; register unsigned long l; register int i,j; static const unsigned char end[4]={0x80,0x00,0x00,0x00}; const unsigned char *cp=end; /* c->num should definitly have room for at least one more byte. */ p=c->data; i=c->num>>2; j=c->num&0x03; l = (j==0) ? 0 : p[i]; HOST_p_c2l(cp,l,j); p[i++]=l; /* i is the next 'undefined word' */ if (i>(SHA_LBLOCK-2)) /* save room for Nl and Nh */ { if (iNh; p[SHA_LBLOCK-1]=c->Nl; sha1_block_host_order (c,p,1); #ifndef HASH_MAKE_STRING #error "HASH_MAKE_STRING must be defined!" #else HASH_MAKE_STRING(c,md); #endif c->num=0; return 1; } static int HASH_UPDATE (OPEN_SHA_CTX *c, const void *data_, size_t len) { const unsigned char *data=data_; register SHA_LONG * p; register SHA_LONG l; size_t sw,sc,ew,ec; if (len==0) return 1; l=(c->Nl+(((SHA_LONG)len)<<3))&0xffffffffUL; /* 95-05-24 eay Fixed a bug with the overflow handling, thanks to * Wei Dai for pointing it out. */ if (l < c->Nl) /* overflow */ c->Nh++; c->Nh+=(len>>29); /* might cause compiler warning on 16-bit */ c->Nl=l; if (c->num != 0) { p=c->data; sw=c->num>>2; sc=c->num&0x03; if ((c->num+len) >= SHA_CBLOCK) { l=p[sw]; HOST_p_c2l(data,l,sc); p[sw++]=l; for (; swnum); c->num=0; /* drop through and do the rest */ } else { c->num+=(unsigned int)len; if ((sc+len) < 4) /* ugly, add char's to a word */ { l=p[sw]; HOST_p_c2l_p(data,l,sc,len); p[sw]=l; } else { ew=(c->num>>2); ec=(c->num&0x03); if (sc) l=p[sw]; HOST_p_c2l(data,l,sc); p[sw++]=l; for (; sw < ew; sw++) { HOST_c2l(data,l); p[sw]=l; } if (ec) { HOST_c2l_p(data,l,ec); p[sw]=l; } } return 1; } } sw=len/SHA_CBLOCK; if (sw > 0) { { sha1_block_data_order(c,data,sw); sw*=SHA_CBLOCK; data+=sw; len-=sw; } } if (len!=0) { p = c->data; c->num = len; ew=len>>2; /* words to copy */ ec=len&0x03; for (; ew; ew--,p++) { HOST_c2l(data,l); *p=l; } HOST_c2l_p(data,l,ec); *p=l; } return 1; } static int HASH_INIT (OPEN_SHA_CTX *c) { c->h0=INIT_DATA_h0; c->h1=INIT_DATA_h1; c->h2=INIT_DATA_h2; c->h3=INIT_DATA_h3; c->h4=INIT_DATA_h4; c->Nl=0; c->Nh=0; c->num=0; return 1; } static BN_POOL *BN_CTX_new(void) { BN_POOL *ret; os_alloc_mem(NULL, (UCHAR **)&ret, sizeof(BN_POOL)); if(!ret) { return NULL; } /* Initialise the structure */ ret->head = ret->currentvar = ret->tail = NULL; ret->used = ret->size = 0; return ret; } static const BIGNUM *BN_value_one(void) { static BN_ULONG data_one=1L; static BIGNUM const_one={&data_one,1,1,BN_FLG_STATIC_DATA}; return(&const_one); } static int BN_num_bits_word(BN_ULONG l) { static const char bits[256]={ 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4, 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, }; { if (l & 0xffff0000L) { if (l & 0xff000000L) return(bits[(int)(l>>24L)]+24); else return(bits[(int)(l>>16L)]+16); } else { if (l & 0xff00L) return(bits[(int)(l>>8)]+8); else return(bits[(int)(l )] ); } } } static int BN_num_bits(const BIGNUM *a) { int i = a->top - 1; if (BN_is_zero(a)) return 0; return ((i*BN_BITS2) + BN_num_bits_word(a->d[i])); } static void BN_clear_free(BIGNUM *a) { int i; if (a == NULL) return; if (a->d != NULL) { if (!(BN_get_flags(a,BN_FLG_STATIC_DATA))) os_free_mem(NULL, a->d); } i=BN_get_flags(a,BN_FLG_MALLOCED); if (i) os_free_mem(NULL, a); } static void BN_free(BIGNUM *a) { if (a == NULL) return; if ((a->d != NULL) && !(BN_get_flags(a,BN_FLG_STATIC_DATA))) os_free_mem(NULL, a->d); if (a->flags & BN_FLG_MALLOCED) os_free_mem(NULL, a); else { a->d = NULL; } } static void BN_init(BIGNUM *a) { memset(a,0,sizeof(BIGNUM)); } static BIGNUM *BN_new(void) { BIGNUM *ret; os_alloc_mem(NULL, (UCHAR **)&ret, sizeof(BIGNUM)); if (ret == NULL) { return(NULL); } ret->flags=BN_FLG_MALLOCED; ret->top=0; ret->dmax=0; ret->d=NULL; return(ret); } /* This is used both by bn_expand2() and bn_dup_expand() */ /* The caller MUST check that words > b->dmax before calling this */ static BN_ULONG *bn_expand_internal(const BIGNUM *b, int words) { BN_ULONG *A,*a = NULL; const BN_ULONG *B; int i; if (words > (INT_MAX/(4*BN_BITS2))) { return NULL; } if (BN_get_flags(b,BN_FLG_STATIC_DATA)) { return(NULL); } os_alloc_mem(NULL, (UCHAR **)&A, sizeof(BN_ULONG)*words); a = A; if (A == NULL) { return(NULL); } B=b->d; /* Check if the previous number needs to be copied */ if (B != NULL) { for (i=b->top>>2; i>0; i--,A+=4,B+=4) { /* * The fact that the loop is unrolled * 4-wise is a tribute to Intel. It's * the one that doesn't have enough * registers to accomodate more data. * I'd unroll it 8-wise otherwise:-) * * */ BN_ULONG a0,a1,a2,a3; a0=B[0]; a1=B[1]; a2=B[2]; a3=B[3]; A[0]=a0; A[1]=a1; A[2]=a2; A[3]=a3; } switch (b->top&3) { case 3: A[2]=B[2]; case 2: A[1]=B[1]; case 1: A[0]=B[0]; case 0: /* workaround for ultrix cc: without 'case 0', the optimizer does * the switch table by doing a=top&3; a--; goto jump_table[a]; * which fails for top== 0 */ ; } } return(a); } /* This is an internal function that should not be used in applications. * It ensures that 'b' has enough room for a 'words' word number * and initialises any unused part of b->d with leading zeros. * It is mostly used by the various BIGNUM routines. If there is an error, * NULL is returned. If not, 'b' is returned. */ static BIGNUM *bn_expand2(BIGNUM *b, int words) { if (words > b->dmax) { BN_ULONG *a = bn_expand_internal(b, words); if(!a) return NULL; if(b->d) os_free_mem(NULL, b->d); b->d=a; b->dmax=words; } return b; } static void BN_clear(BIGNUM *a) { if (a->d != NULL) memset(a->d,0,a->dmax*sizeof(a->d[0])); a->top=0; } static int BN_set_word(BIGNUM *a, BN_ULONG w) { if (NULL == ((1 <= (a)->dmax)?(a):bn_expand2((a),1))) { DHPRINT("BN_set_word NULL\n"); return(0); } a->d[0] = w; a->top = (w ? 1 : 0); return(1); } static BIGNUM *BN_bin2bn(const unsigned char *s, int len, BIGNUM *ret) { unsigned int i,m; unsigned int n; BN_ULONG l; BIGNUM *bn = NULL; if (ret == NULL) ret = bn = BN_new(); if (ret == NULL) return(NULL); l=0; n=len; if (n == 0) { ret->top=0; return(ret); } i=((n-1)/BN_BYTES)+1; m=((n-1)%(BN_BYTES)); if (bn_wexpand(ret, (int)i) == NULL) { BN_free(bn); return NULL; } ret->top=i; while (n--) { l=(l<<8L)| *(s++); if (m-- == 0) { ret->d[--i]=l; l=0; m=BN_BYTES-1; } } /* need to call this due to clear byte at top if avoiding * having the top bit set (-ve number) */ bn_correct_top(ret); return(ret); } static int BN_ucmp(const BIGNUM *a, const BIGNUM *b) { int i; BN_ULONG t1,t2,*ap,*bp; i=a->top-b->top; if (i != 0) return(i); ap=a->d; bp=b->d; for (i=a->top-1; i>=0; i--) { t1= ap[i]; t2= bp[i]; if (t1 != t2) return((t1 > t2) ? 1 : -1); } return(0); } /* ignore negative */ static int BN_bn2bin(const BIGNUM *a, unsigned char *to) { int n,i; BN_ULONG l; n=i=BN_num_bytes(a); while (i--) { l=a->d[i/BN_BYTES]; *(to++)=(unsigned char)(l>>(8*(i%BN_BYTES)))&0xff; } return(n); } #ifdef BN_RECURSION static int BN_cmp(const BIGNUM *a, const BIGNUM *b) { int i; int gt,lt; BN_ULONG t1,t2; if ((a == NULL) || (b == NULL)) { if (a != NULL) return(-1); else if (b != NULL) return(1); else return(0); } { gt=1; lt= -1; } if (a->top > b->top) return(gt); if (a->top < b->top) return(lt); for (i=a->top-1; i>=0; i--) { t1=a->d[i]; t2=b->d[i]; if (t1 > t2) return(gt); if (t1 < t2) return(lt); } return(0); } #endif /*BN_RECURSION */ static int BN_set_bit(BIGNUM *a, int n) { int i,j,k; if (n < 0) return 0; i=n/BN_BITS2; j=n%BN_BITS2; if (a->top <= i) { if (bn_wexpand(a,i+1) == NULL) return(0); for(k=a->top; kd[k]=0; a->top=i+1; } a->d[i]|=(((BN_ULONG)1)<top <= i) return 0; return((a->d[i]&(((BN_ULONG)1)< bb)?1:-1); for (i=n-2; i>=0; i--) { aa=a[i]; bb=b[i]; if (aa != bb) return((aa > bb)?1:-1); } return(0); } /* Here follows a specialised variants of bn_cmp_words(). It has the property of performing the operation on arrays of different sizes. The sizes of those arrays is expressed through cl, which is the common length ( basicall, min(len(a),len(b)) ), and dl, which is the delta between the two lengths, calculated as len(a)-len(b). All lengths are the number of BN_ULONGs... */ static int bn_cmp_part_words(const BN_ULONG *a, const BN_ULONG *b, int cl, int dl) { int n,i; n = cl-1; if (dl < 0) { for (i=dl; i<0; i++) { if (b[n-i] != 0) return -1; /* a < b */ } } if (dl > 0) { for (i=dl; i>0; i--) { if (a[n+i] != 0) return 1; /* a > b */ } } return bn_cmp_words(a,b,cl); } #endif /*BN_RECURSION */ static int BN_rshift1(BIGNUM *r, const BIGNUM *a) { BN_ULONG *ap,*rp,t,c; int i; if (BN_is_zero(a)) { BN_zero(r); return(1); } if (a != r) { if (bn_wexpand(r,a->top) == NULL) return(0); r->top=a->top; } ap=a->d; rp=r->d; c=0; for (i=a->top-1; i>=0; i--) { t=ap[i]; rp[i]=((t>>1)&BN_MASK2)|c; c=(t&1)?BN_TBIT:0; } bn_correct_top(r); return(1); } static int BN_lshift(BIGNUM *r, const BIGNUM *a, int n) { int i,nw,lb,rb; BN_ULONG *t,*f; BN_ULONG l; nw=n/BN_BITS2; if (bn_wexpand(r,a->top+nw+1) == NULL) return(0); lb=n%BN_BITS2; rb=BN_BITS2-lb; f=a->d; t=r->d; t[a->top+nw]=0; if (lb == 0) for (i=a->top-1; i>=0; i--) t[nw+i]=f[i]; else for (i=a->top-1; i>=0; i--) { l=f[i]; t[nw+i+1]|=(l>>rb)&BN_MASK2; t[nw+i]=(l<top=a->top+nw+1; bn_correct_top(r); return(1); } static int BN_rshift(BIGNUM *r, const BIGNUM *a, int n) { int i,j,nw,lb,rb; BN_ULONG *t,*f; BN_ULONG l,tmp; nw=n/BN_BITS2; rb=n%BN_BITS2; lb=BN_BITS2-rb; if (nw > a->top || a->top == 0) { BN_zero(r); return(1); } if (r != a) { if (bn_wexpand(r,a->top-nw+1) == NULL) return(0); } else { if (n == 0) return 1; /* or the copying loop will go berserk */ } f= &(a->d[nw]); t=r->d; j=a->top-nw; r->top=j; if (rb == 0) { for (i=j; i != 0; i--) *(t++)= *(f++); } else { l= *(f++); for (i=j-1; i != 0; i--) { tmp =(l>>rb)&BN_MASK2; l= *(f++); *(t++) =(tmp|(l<>rb)&BN_MASK2; } bn_correct_top(r); return(1); } static void BN_POOL_release(BN_POOL *p, unsigned int num) { unsigned int offset = (p->used - 1) % BN_CTX_POOL_SIZE; p->used -= num; while(num--) { if(!offset) { offset = BN_CTX_POOL_SIZE - 1; p->currentvar = p->currentvar->prev; } else offset--; } } static void BN_POOL_finish(BN_POOL *p) { while(p->head) { BIGNUM *bn=p->head->vals; unsigned int loop = 0; while(loop++ < BN_CTX_POOL_SIZE) { BIGNUM *tmp = bn; /* avoid compile warning: use freed pointer */ if(bn->d) BN_clear_free(tmp); bn++; } p->currentvar = p->head->next; os_free_mem(NULL, p->head); p->head = p->currentvar; } } static BIGNUM *BN_POOL_get(BN_POOL *p) { BIGNUM *ret=NULL; if(p->used == p->size) { BIGNUM *bn; unsigned int loop = 0; BN_POOL_ITEM *item; os_alloc_mem(NULL, (UCHAR **)&item, sizeof(BN_POOL_ITEM)); if(!item) goto end; /* Initialise the structure */ bn = item->vals; while(loop++ < BN_CTX_POOL_SIZE) BN_init(bn++); item->prev = p->tail; item->next = NULL; /* Link it in */ if(!p->head) p->head = p->currentvar = p->tail = item; else { p->tail->next = item; p->tail = item; p->currentvar = item; } p->size += BN_CTX_POOL_SIZE; p->used++; /* Return the first bignum from the new pool */ ret = item->vals; } if(!p->used) p->currentvar = p->head; else if((p->used % BN_CTX_POOL_SIZE) == 0) p->currentvar = p->currentvar->next; ret = p->currentvar->vals + ((p->used) % BN_CTX_POOL_SIZE); p->used++; end: if(ret == NULL) { DHPRINT("BN_POOL_get ERROR !!!"); } else { /* OK, make sure the returned bignum is "zero" */ BN_zero(ret); } return ret; } static BIGNUM *BN_copy(BIGNUM *a, const BIGNUM *b) { int i; BN_ULONG *A; const BN_ULONG *B; if (0 == BN_ucmp(a,b)) return(a); if (bn_wexpand(a,b->top) == NULL) return(NULL); A=a->d; B=b->d; for (i=b->top>>2; i>0; i--,A+=4,B+=4) { BN_ULONG a0,a1,a2,a3; a0=B[0]; a1=B[1]; a2=B[2]; a3=B[3]; A[0]=a0; A[1]=a1; A[2]=a2; A[3]=a3; } switch (b->top&3) { case 3: A[2]=B[2]; case 2: A[1]=B[1]; case 1: A[0]=B[0]; case 0: ; /* ultrix cc workaround, see comments in bn_expand_internal */ } a->top=b->top; return(a); } /* Divide h,l by d and return the result. */ /* I need to test this some more :-( */ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) { BN_ULONG dh,dl,q,ret=0,th,tl,t; int i,count=2; if (d == 0) return(BN_MASK2); i=BN_num_bits_word(d); i=BN_BITS2-i; if (h >= d) h-=d; if (i) { d<<=i; h=(h<>(BN_BITS2-i)); l<<=i; } dh=(d&BN_MASK2h)>>BN_BITS4; dl=(d&BN_MASK2l); for (;;) { if ((h>>BN_BITS4) == dh) q=BN_MASK2l; else q=h/dh; th=q*dh; tl=dl*q; for (;;) { t=h-th; if ((t&BN_MASK2h) || ((tl) <= ( (t<>BN_BITS4)))) break; q--; th-=dh; tl-=dl; } t=(tl>>BN_BITS4); tl=(tl<>BN_BITS4))&BN_MASK2; l=(l&BN_MASK2l)<top; num_n=snum->top; loop=num_n-div_n; /* Lets setup a 'window' into snum * This is the part that corresponds to the current * 'area' being divided */ wnum.d = &(snum->d[loop]); wnum.top = div_n; /* only needed when BN_ucmp messes up the values between top and max */ wnum.dmax = snum->dmax - loop; /* so we don't step out of bounds */ /* Get the top 2 words of sdiv */ /* div_n=sdiv->top; */ d0=sdiv->d[div_n-1]; d1=(div_n == 1)?0:sdiv->d[div_n-2]; /* pointer to the 'top' of snum */ wnump= &(snum->d[num_n-1]); /* Setup to 'res' */ if (!bn_wexpand(res,(loop+1))) goto err; res->top=loop; resp= &(res->d[loop-1]); /* space for temp */ if (!bn_wexpand(tmp,(div_n+1))) goto err; if (BN_ucmp(&wnum,sdiv) >= 0) { bn_sub_words(wnum.d, wnum.d, sdiv->d, div_n); *resp=1; } else res->top--; /* if res->top == 0 then clear the neg value otherwise decrease * the resp pointer */ if (res->top != 0) resp--; for (i=0; id,sdiv->d,div_n,q); tmp->d[div_n]=l0; wnum.d--; /* ingore top values of the bignums just sub the two * BN_ULONG arrays with bn_sub_words */ if (bn_sub_words(wnum.d, wnum.d, tmp->d, div_n+1)) { /* Note: As we have considered only the leading * two BN_ULONGs in the calculation of q, sdiv * q * might be greater than wnum (but then (q-1) * sdiv * is less or equal than wnum) */ q--; if (bn_add_words(wnum.d, wnum.d, sdiv->d, div_n)) /* we can't have an overflow here (assuming * that q != 0, but if q == 0 then tmp is * zero anyway) */ (*wnump)++; } /* store part of the result */ *resp = q; } bn_correct_top(snum); if (rm != NULL) { /* Keep a copy of the neg flag in num because if rm==num * BN_rshift() will overwrite it. */ BN_rshift(rm,snum,norm_shift); } BN_POOL_release(ctx,release); return(1); err: BN_POOL_release(ctx,release); return(0); } /*bn_div END*/ /*bn_asm START*/ static BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) { BN_ULONG c=0; BN_ULONG bl,bh; if (num <= 0) return((BN_ULONG)0); bl=LBITS(w); bh=HBITS(w); for (;;) { mul_add(rp[0],ap[0],bl,bh,c); if (--num == 0) break; mul_add(rp[1],ap[1],bl,bh,c); if (--num == 0) break; mul_add(rp[2],ap[2],bl,bh,c); if (--num == 0) break; mul_add(rp[3],ap[3],bl,bh,c); if (--num == 0) break; ap+=4; rp+=4; } return(c); } static BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) { BN_ULONG carry=0; BN_ULONG bl,bh; if (num <= 0) return((BN_ULONG)0); bl=LBITS(w); bh=HBITS(w); for (;;) { mul(rp[0],ap[0],bl,bh,carry); if (--num == 0) break; mul(rp[1],ap[1],bl,bh,carry); if (--num == 0) break; mul(rp[2],ap[2],bl,bh,carry); if (--num == 0) break; mul(rp[3],ap[3],bl,bh,carry); if (--num == 0) break; ap+=4; rp+=4; } return(carry); } static void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) { if (n <= 0) return; for (;;) { sqr64(r[0],r[1],a[0]); if (--n == 0) break; sqr64(r[2],r[3],a[1]); if (--n == 0) break; sqr64(r[4],r[5],a[2]); if (--n == 0) break; sqr64(r[6],r[7],a[3]); if (--n == 0) break; a+=4; r+=8; } } static BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) { BN_ULONG c,l,t; if (n <= 0) return((BN_ULONG)0); c=0; for (;;) { t=a[0]; t=(t+c)&BN_MASK2; c=(t < c); l=(t+b[0])&BN_MASK2; c+=(l < t); r[0]=l; if (--n <= 0) break; t=a[1]; t=(t+c)&BN_MASK2; c=(t < c); l=(t+b[1])&BN_MASK2; c+=(l < t); r[1]=l; if (--n <= 0) break; t=a[2]; t=(t+c)&BN_MASK2; c=(t < c); l=(t+b[2])&BN_MASK2; c+=(l < t); r[2]=l; if (--n <= 0) break; t=a[3]; t=(t+c)&BN_MASK2; c=(t < c); l=(t+b[3])&BN_MASK2; c+=(l < t); r[3]=l; if (--n <= 0) break; a+=4; b+=4; r+=4; } return((BN_ULONG)c); } static BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) { BN_ULONG t1,t2; int c=0; if (n <= 0) return((BN_ULONG)0); for (;;) { t1=a[0]; t2=b[0]; r[0]=(t1-t2-c)&BN_MASK2; if (t1 != t2) c=(t1 < t2); if (--n <= 0) break; t1=a[1]; t2=b[1]; r[1]=(t1-t2-c)&BN_MASK2; if (t1 != t2) c=(t1 < t2); if (--n <= 0) break; t1=a[2]; t2=b[2]; r[2]=(t1-t2-c)&BN_MASK2; if (t1 != t2) c=(t1 < t2); if (--n <= 0) break; t1=a[3]; t2=b[3]; r[3]=(t1-t2-c)&BN_MASK2; if (t1 != t2) c=(t1 < t2); if (--n <= 0) break; a+=4; b+=4; r+=4; } return(c); } #ifdef BN_MUL_COMBA #undef bn_mul_comba8 #undef bn_mul_comba4 #undef bn_sqr_comba8 #undef bn_sqr_comba4 /* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */ /* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */ /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */ /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ #define mul_add_c(a,b,c0,c1,c2) \ t1=LBITS(a); t2=HBITS(a); \ bl=LBITS(b); bh=HBITS(b); \ mul64(t1,t2,bl,bh); \ c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; #define mul_add_c2(a,b,c0,c1,c2) \ t1=LBITS(a); t2=HBITS(a); \ bl=LBITS(b); bh=HBITS(b); \ mul64(t1,t2,bl,bh); \ if (t2 & BN_TBIT) c2++; \ t2=(t2+t2)&BN_MASK2; \ if (t1 & BN_TBIT) t2++; \ t1=(t1+t1)&BN_MASK2; \ c0=(c0+t1)&BN_MASK2; \ if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \ c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; #define sqr_add_c(a,i,c0,c1,c2) \ sqr64(t1,t2,(a)[i]); \ c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; #define sqr_add_c2(a,i,j,c0,c1,c2) \ mul_add_c2((a)[i],(a)[j],c0,c1,c2) static void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) { BN_ULONG bl,bh; BN_ULONG t1,t2; BN_ULONG c1,c2,c3; c1=0; c2=0; c3=0; mul_add_c(a[0],b[0],c1,c2,c3); r[0]=c1; c1=0; mul_add_c(a[0],b[1],c2,c3,c1); mul_add_c(a[1],b[0],c2,c3,c1); r[1]=c2; c2=0; mul_add_c(a[2],b[0],c3,c1,c2); mul_add_c(a[1],b[1],c3,c1,c2); mul_add_c(a[0],b[2],c3,c1,c2); r[2]=c3; c3=0; mul_add_c(a[0],b[3],c1,c2,c3); mul_add_c(a[1],b[2],c1,c2,c3); mul_add_c(a[2],b[1],c1,c2,c3); mul_add_c(a[3],b[0],c1,c2,c3); r[3]=c1; c1=0; mul_add_c(a[4],b[0],c2,c3,c1); mul_add_c(a[3],b[1],c2,c3,c1); mul_add_c(a[2],b[2],c2,c3,c1); mul_add_c(a[1],b[3],c2,c3,c1); mul_add_c(a[0],b[4],c2,c3,c1); r[4]=c2; c2=0; mul_add_c(a[0],b[5],c3,c1,c2); mul_add_c(a[1],b[4],c3,c1,c2); mul_add_c(a[2],b[3],c3,c1,c2); mul_add_c(a[3],b[2],c3,c1,c2); mul_add_c(a[4],b[1],c3,c1,c2); mul_add_c(a[5],b[0],c3,c1,c2); r[5]=c3; c3=0; mul_add_c(a[6],b[0],c1,c2,c3); mul_add_c(a[5],b[1],c1,c2,c3); mul_add_c(a[4],b[2],c1,c2,c3); mul_add_c(a[3],b[3],c1,c2,c3); mul_add_c(a[2],b[4],c1,c2,c3); mul_add_c(a[1],b[5],c1,c2,c3); mul_add_c(a[0],b[6],c1,c2,c3); r[6]=c1; c1=0; mul_add_c(a[0],b[7],c2,c3,c1); mul_add_c(a[1],b[6],c2,c3,c1); mul_add_c(a[2],b[5],c2,c3,c1); mul_add_c(a[3],b[4],c2,c3,c1); mul_add_c(a[4],b[3],c2,c3,c1); mul_add_c(a[5],b[2],c2,c3,c1); mul_add_c(a[6],b[1],c2,c3,c1); mul_add_c(a[7],b[0],c2,c3,c1); r[7]=c2; c2=0; mul_add_c(a[7],b[1],c3,c1,c2); mul_add_c(a[6],b[2],c3,c1,c2); mul_add_c(a[5],b[3],c3,c1,c2); mul_add_c(a[4],b[4],c3,c1,c2); mul_add_c(a[3],b[5],c3,c1,c2); mul_add_c(a[2],b[6],c3,c1,c2); mul_add_c(a[1],b[7],c3,c1,c2); r[8]=c3; c3=0; mul_add_c(a[2],b[7],c1,c2,c3); mul_add_c(a[3],b[6],c1,c2,c3); mul_add_c(a[4],b[5],c1,c2,c3); mul_add_c(a[5],b[4],c1,c2,c3); mul_add_c(a[6],b[3],c1,c2,c3); mul_add_c(a[7],b[2],c1,c2,c3); r[9]=c1; c1=0; mul_add_c(a[7],b[3],c2,c3,c1); mul_add_c(a[6],b[4],c2,c3,c1); mul_add_c(a[5],b[5],c2,c3,c1); mul_add_c(a[4],b[6],c2,c3,c1); mul_add_c(a[3],b[7],c2,c3,c1); r[10]=c2; c2=0; mul_add_c(a[4],b[7],c3,c1,c2); mul_add_c(a[5],b[6],c3,c1,c2); mul_add_c(a[6],b[5],c3,c1,c2); mul_add_c(a[7],b[4],c3,c1,c2); r[11]=c3; c3=0; mul_add_c(a[7],b[5],c1,c2,c3); mul_add_c(a[6],b[6],c1,c2,c3); mul_add_c(a[5],b[7],c1,c2,c3); r[12]=c1; c1=0; mul_add_c(a[6],b[7],c2,c3,c1); mul_add_c(a[7],b[6],c2,c3,c1); r[13]=c2; c2=0; mul_add_c(a[7],b[7],c3,c1,c2); r[14]=c3; r[15]=c1; } #ifdef BN_RECURSION static void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) { BN_ULONG bl,bh; BN_ULONG t1,t2; BN_ULONG c1,c2,c3; c1=0; c2=0; c3=0; mul_add_c(a[0],b[0],c1,c2,c3); r[0]=c1; c1=0; mul_add_c(a[0],b[1],c2,c3,c1); mul_add_c(a[1],b[0],c2,c3,c1); r[1]=c2; c2=0; mul_add_c(a[2],b[0],c3,c1,c2); mul_add_c(a[1],b[1],c3,c1,c2); mul_add_c(a[0],b[2],c3,c1,c2); r[2]=c3; c3=0; mul_add_c(a[0],b[3],c1,c2,c3); mul_add_c(a[1],b[2],c1,c2,c3); mul_add_c(a[2],b[1],c1,c2,c3); mul_add_c(a[3],b[0],c1,c2,c3); r[3]=c1; c1=0; mul_add_c(a[3],b[1],c2,c3,c1); mul_add_c(a[2],b[2],c2,c3,c1); mul_add_c(a[1],b[3],c2,c3,c1); r[4]=c2; c2=0; mul_add_c(a[2],b[3],c3,c1,c2); mul_add_c(a[3],b[2],c3,c1,c2); r[5]=c3; c3=0; mul_add_c(a[3],b[3],c1,c2,c3); r[6]=c1; r[7]=c2; } #endif /*BN_RECURSION */ static void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) { BN_ULONG bl,bh; BN_ULONG t1,t2; BN_ULONG c1,c2,c3; c1=0; c2=0; c3=0; sqr_add_c(a,0,c1,c2,c3); r[0]=c1; c1=0; sqr_add_c2(a,1,0,c2,c3,c1); r[1]=c2; c2=0; sqr_add_c(a,1,c3,c1,c2); sqr_add_c2(a,2,0,c3,c1,c2); r[2]=c3; c3=0; sqr_add_c2(a,3,0,c1,c2,c3); sqr_add_c2(a,2,1,c1,c2,c3); r[3]=c1; c1=0; sqr_add_c(a,2,c2,c3,c1); sqr_add_c2(a,3,1,c2,c3,c1); sqr_add_c2(a,4,0,c2,c3,c1); r[4]=c2; c2=0; sqr_add_c2(a,5,0,c3,c1,c2); sqr_add_c2(a,4,1,c3,c1,c2); sqr_add_c2(a,3,2,c3,c1,c2); r[5]=c3; c3=0; sqr_add_c(a,3,c1,c2,c3); sqr_add_c2(a,4,2,c1,c2,c3); sqr_add_c2(a,5,1,c1,c2,c3); sqr_add_c2(a,6,0,c1,c2,c3); r[6]=c1; c1=0; sqr_add_c2(a,7,0,c2,c3,c1); sqr_add_c2(a,6,1,c2,c3,c1); sqr_add_c2(a,5,2,c2,c3,c1); sqr_add_c2(a,4,3,c2,c3,c1); r[7]=c2; c2=0; sqr_add_c(a,4,c3,c1,c2); sqr_add_c2(a,5,3,c3,c1,c2); sqr_add_c2(a,6,2,c3,c1,c2); sqr_add_c2(a,7,1,c3,c1,c2); r[8]=c3; c3=0; sqr_add_c2(a,7,2,c1,c2,c3); sqr_add_c2(a,6,3,c1,c2,c3); sqr_add_c2(a,5,4,c1,c2,c3); r[9]=c1; c1=0; sqr_add_c(a,5,c2,c3,c1); sqr_add_c2(a,6,4,c2,c3,c1); sqr_add_c2(a,7,3,c2,c3,c1); r[10]=c2; c2=0; sqr_add_c2(a,7,4,c3,c1,c2); sqr_add_c2(a,6,5,c3,c1,c2); r[11]=c3; c3=0; sqr_add_c(a,6,c1,c2,c3); sqr_add_c2(a,7,5,c1,c2,c3); r[12]=c1; c1=0; sqr_add_c2(a,7,6,c2,c3,c1); r[13]=c2; c2=0; sqr_add_c(a,7,c3,c1,c2); r[14]=c3; r[15]=c1; } static void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) { BN_ULONG bl,bh; BN_ULONG t1,t2; BN_ULONG c1,c2,c3; c1=0; c2=0; c3=0; sqr_add_c(a,0,c1,c2,c3); r[0]=c1; c1=0; sqr_add_c2(a,1,0,c2,c3,c1); r[1]=c2; c2=0; sqr_add_c(a,1,c3,c1,c2); sqr_add_c2(a,2,0,c3,c1,c2); r[2]=c3; c3=0; sqr_add_c2(a,3,0,c1,c2,c3); sqr_add_c2(a,2,1,c1,c2,c3); r[3]=c1; c1=0; sqr_add_c(a,2,c2,c3,c1); sqr_add_c2(a,3,1,c2,c3,c1); r[4]=c2; c2=0; sqr_add_c2(a,3,2,c3,c1,c2); r[5]=c3; c3=0; sqr_add_c(a,3,c1,c2,c3); r[6]=c1; r[7]=c2; } #endif /* !BN_MUL_COMBA */ /*bn_asm END*/ /*bn_mul START*/ /* Here follows specialised variants of bn_add_words() and bn_sub_words(). They have the property performing operations on arrays of different sizes. The sizes of those arrays is expressed through cl, which is the common length ( basicall, min(len(a),len(b)) ), and dl, which is the delta between the two lengths, calculated as len(a)-len(b). All lengths are the number of BN_ULONGs... For the operations that require a result array as parameter, it must have the length cl+abs(dl). These functions should probably end up in bn_asm.c as soon as there are assembler counterparts for the systems that use assembler files. */ #ifdef BN_RECURSION static BN_ULONG bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int cl, int dl) { BN_ULONG c, t; c = bn_sub_words(r, a, b, cl); if (dl == 0) return c; r += cl; a += cl; b += cl; if (dl < 0) { for (;;) { t = b[0]; r[0] = (0-t-c)&BN_MASK2; if (t != 0) c=1; if (++dl >= 0) break; t = b[1]; r[1] = (0-t-c)&BN_MASK2; if (t != 0) c=1; if (++dl >= 0) break; t = b[2]; r[2] = (0-t-c)&BN_MASK2; if (t != 0) c=1; if (++dl >= 0) break; t = b[3]; r[3] = (0-t-c)&BN_MASK2; if (t != 0) c=1; if (++dl >= 0) break; b += 4; r += 4; } } else { int save_dl = dl; while(c) { t = a[0]; r[0] = (t-c)&BN_MASK2; if (t != 0) c=0; if (--dl <= 0) break; t = a[1]; r[1] = (t-c)&BN_MASK2; if (t != 0) c=0; if (--dl <= 0) break; t = a[2]; r[2] = (t-c)&BN_MASK2; if (t != 0) c=0; if (--dl <= 0) break; t = a[3]; r[3] = (t-c)&BN_MASK2; if (t != 0) c=0; if (--dl <= 0) break; save_dl = dl; a += 4; r += 4; } if (dl > 0) { if (save_dl > dl) { switch (save_dl - dl) { case 1: r[1] = a[1]; if (--dl <= 0) break; case 2: r[2] = a[2]; if (--dl <= 0) break; case 3: r[3] = a[3]; if (--dl <= 0) break; } a += 4; r += 4; } } if (dl > 0) { for(;;) { r[0] = a[0]; if (--dl <= 0) break; r[1] = a[1]; if (--dl <= 0) break; r[2] = a[2]; if (--dl <= 0) break; r[3] = a[3]; if (--dl <= 0) break; a += 4; r += 4; } } } return c; } /* Karatsuba recursive multiplication algorithm * (cf. Knuth, The Art of Computer Programming, Vol. 2) */ /* r is 2*n2 words in size, * a and b are both n2 words in size. * n2 must be a power of 2. * We multiply and return the result. * t must be 2*n2 words in size * We calculate * a[0]*b[0] * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0]) * a[1]*b[1] */ static void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, int dna, int dnb, BN_ULONG *t) { int n=n2/2,c1,c2; int tna=n+dna, tnb=n+dnb; unsigned int neg,zero; BN_ULONG ln,lo,*p; # ifdef BN_MUL_COMBA /* Only call bn_mul_comba 8 if n2 == 8 and the * two arrays are complete [steve] */ if (n2 == 8 && dna == 0 && dnb == 0) { bn_mul_comba8(r,a,b); return; } # endif /* BN_MUL_COMBA */ /* Else do normal multiply */ if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL) { bn_mul_normal(r,a,n2+dna,b,n2+dnb); if ((dna + dnb) < 0) memset(&r[2*n2 + dna + dnb], 0, sizeof(BN_ULONG) * -(dna + dnb)); return; } /* r=(a[0]-a[1])*(b[1]-b[0]) */ c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna); c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n); zero=neg=0; switch (c1*3+c2) { case -4: bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */ bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */ break; case -3: zero=1; break; case -2: bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */ bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n); /* + */ neg=1; break; case -1: case 0: case 1: zero=1; break; case 2: bn_sub_part_words(t, a, &(a[n]),tna,n-tna); /* + */ bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */ neg=1; break; case 3: zero=1; break; case 4: bn_sub_part_words(t, a, &(a[n]),tna,n-tna); bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n); break; } # ifdef BN_MUL_COMBA if (n == 4 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba4 could take extra args to do this well */ { if (!zero) bn_mul_comba4(&(t[n2]),t,&(t[n])); else memset(&(t[n2]),0,8*sizeof(BN_ULONG)); bn_mul_comba4(r,a,b); bn_mul_comba4(&(r[n2]),&(a[n]),&(b[n])); } else if (n == 8 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba8 could take extra args to do this well */ { if (!zero) bn_mul_comba8(&(t[n2]),t,&(t[n])); else memset(&(t[n2]),0,16*sizeof(BN_ULONG)); bn_mul_comba8(r,a,b); bn_mul_comba8(&(r[n2]),&(a[n]),&(b[n])); } else # endif /* BN_MUL_COMBA */ { p= &(t[n2*2]); if (!zero) bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p); else memset(&(t[n2]),0,n2*sizeof(BN_ULONG)); bn_mul_recursive(r,a,b,n,0,0,p); bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),n,dna,dnb,p); } /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign * r[10] holds (a[0]*b[0]) * r[32] holds (b[1]*b[1]) */ c1=(int)(bn_add_words(t,r,&(r[n2]),n2)); if (neg) /* if t[32] is negative */ { c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2)); } else { /* Might have a carry */ c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),t,n2)); } /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1]) * r[10] holds (a[0]*b[0]) * r[32] holds (b[1]*b[1]) * c1 holds the carry bits */ c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2)); if (c1) { p= &(r[n+n2]); lo= *p; ln=(lo+c1)&BN_MASK2; *p=ln; /* The overflow will stop before we over write * words we should not overwrite */ if (ln < (BN_ULONG)c1) { do { p++; lo= *p; ln=(lo+1)&BN_MASK2; *p=ln; } while (ln == 0); } } } /* n+tn is the word length * t needs to be n*4 is size, as does r */ static void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, int tna, int tnb, BN_ULONG *t) { int i,j,n2=n*2; int c1,c2,neg,zero; BN_ULONG ln,lo,*p; if (n < 8) { bn_mul_normal(r,a,n+tna,b,n+tnb); return; } /* r=(a[0]-a[1])*(b[1]-b[0]) */ c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna); c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n); zero=neg=0; switch (c1*3+c2) { case -4: bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */ bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */ break; case -3: zero=1; /* break; */ case -2: bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */ bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n); /* + */ neg=1; break; case -1: case 0: case 1: zero=1; /* break; */ case 2: bn_sub_part_words(t, a, &(a[n]),tna,n-tna); /* + */ bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */ neg=1; break; case 3: zero=1; /* break; */ case 4: bn_sub_part_words(t, a, &(a[n]),tna,n-tna); bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n); break; } if (n == 8) { bn_mul_comba8(&(t[n2]),t,&(t[n])); bn_mul_comba8(r,a,b); bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb); memset(&(r[n2+tna+tnb]),0,sizeof(BN_ULONG)*(n2-tna-tnb)); } else { p= &(t[n2*2]); bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p); bn_mul_recursive(r,a,b,n,0,0,p); i=n/2; /* If there is only a bottom half to the number, * just do it */ if (tna > tnb) j = tna - i; else j = tnb - i; if (j == 0) { bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]), i,tna-i,tnb-i,p); memset(&(r[n2+i*2]),0,sizeof(BN_ULONG)*(n2-i*2)); } else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */ { bn_mul_part_recursive(&(r[n2]),&(a[n]),&(b[n]), i,tna-i,tnb-i,p); memset(&(r[n2+tna+tnb]),0, sizeof(BN_ULONG)*(n2-tna-tnb)); } else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */ { memset(&(r[n2]),0,sizeof(BN_ULONG)*n2); if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL && tnb < BN_MUL_RECURSIVE_SIZE_NORMAL) { bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb); } else { for (;;) { i/=2; if (i < tna && i < tnb) { bn_mul_part_recursive(&(r[n2]), &(a[n]),&(b[n]), i,tna-i,tnb-i,p); break; } else if (i <= tna && i <= tnb) { bn_mul_recursive(&(r[n2]), &(a[n]),&(b[n]), i,tna-i,tnb-i,p); break; } } } } } /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign * r[10] holds (a[0]*b[0]) * r[32] holds (b[1]*b[1]) */ c1=(int)(bn_add_words(t,r,&(r[n2]),n2)); if (neg) /* if t[32] is negative */ { c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2)); } else { /* Might have a carry */ c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),t,n2)); } /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1]) * r[10] holds (a[0]*b[0]) * r[32] holds (b[1]*b[1]) * c1 holds the carry bits */ c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2)); if (c1) { p= &(r[n+n2]); lo= *p; ln=(lo+c1)&BN_MASK2; *p=ln; /* The overflow will stop before we over write * words we should not overwrite */ if (ln < (BN_ULONG)c1) { do { p++; lo= *p; ln=(lo+1)&BN_MASK2; *p=ln; } while (ln == 0); } } } #endif /* BN_RECURSION */ static int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_POOL *ctx) { int release=0; int ret=0; int top,al,bl; BIGNUM *rr; #if defined(BN_MUL_COMBA) || defined(BN_RECURSION) int i; #endif #ifdef BN_RECURSION BIGNUM *t=NULL; int j=0,k; #endif al=a->top; bl=b->top; if ((al == 0) || (bl == 0)) { BN_zero(r); return(1); } top=al+bl; if ((r == a) || (r == b)) { if ((rr = BN_POOL_get(ctx)) == NULL) goto err; release++; } else rr = r; #if defined(BN_MUL_COMBA) || defined(BN_RECURSION) i = al-bl; #endif #ifdef BN_MUL_COMBA if (i == 0) { if (al == 8) { if (bn_wexpand(rr,16) == NULL) goto err; rr->top=16; bn_mul_comba8(rr->d,a->d,b->d); goto end; } } #endif /* BN_MUL_COMBA */ #ifdef BN_RECURSION if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL)) { if (i >= -1 && i <= 1) { int sav_j =0; /* Find out the power of two lower or equal to the longest of the two numbers */ if (i >= 0) { j = BN_num_bits_word((BN_ULONG)al); } if (i == -1) { j = BN_num_bits_word((BN_ULONG)bl); } sav_j = j; j = 1<<(j-1); k = j+j; t = BN_POOL_get(ctx); release++; if (al > j || bl > j) { bn_wexpand(t,k*4); bn_wexpand(rr,k*4); bn_mul_part_recursive(rr->d,a->d,b->d, j,al-j,bl-j,t->d); } else /* al <= j || bl <= j */ { bn_wexpand(t,k*2); bn_wexpand(rr,k*2); bn_mul_recursive(rr->d,a->d,b->d, j,al-j,bl-j,t->d); } rr->top=top; goto end; } } #endif /* BN_RECURSION */ if (bn_wexpand(rr,top) == NULL) goto err; rr->top=top; bn_mul_normal(rr->d,a->d,al,b->d,bl); #if defined(BN_MUL_COMBA) || defined(BN_RECURSION) end: #endif bn_correct_top(rr); if (r != rr) BN_copy(r,rr); ret=1; err: BN_POOL_release(ctx,release); return(ret); } static void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb) { BN_ULONG *rr; if (na < nb) { int itmp; BN_ULONG *ltmp; itmp=na; na=nb; nb=itmp; ltmp=a; a=b; b=ltmp; } rr= &(r[na]); if (nb <= 0) { (void)bn_mul_words(r,a,na,0); return; } else rr[0]=bn_mul_words(r,a,na,b[0]); for (;;) { if (--nb <= 0) return; rr[1]=bn_mul_add_words(&(r[1]),a,na,b[1]); if (--nb <= 0) return; rr[2]=bn_mul_add_words(&(r[2]),a,na,b[2]); if (--nb <= 0) return; rr[3]=bn_mul_add_words(&(r[3]),a,na,b[3]); if (--nb <= 0) return; rr[4]=bn_mul_add_words(&(r[4]),a,na,b[4]); rr+=4; r+=4; b+=4; } } /*bn_mul END*/ /* tmp must have 2*n words */ static void bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp) { int i,j,max; const BN_ULONG *ap; BN_ULONG *rp; max=n*2; ap=a; rp=r; rp[0]=rp[max-1]=0; rp++; j=n; if (--j > 0) { ap++; rp[j]=bn_mul_words(rp,ap,j,ap[-1]); rp+=2; } for (i=n-2; i>0; i--) { j--; ap++; rp[j]=bn_mul_add_words(rp,ap,j,ap[-1]); rp+=2; } bn_add_words(r,r,r,max); /* There will not be a carry */ bn_sqr_words(tmp,a,n); bn_add_words(r,r,tmp,max); } #ifdef BN_RECURSION /* r is 2*n words in size, * a and b are both n words in size. (There's not actually a 'b' here ...) * n must be a power of 2. * We multiply and return the result. * t must be 2*n words in size * We calculate * a[0]*b[0] * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0]) * a[1]*b[1] */ static void bn_sqr_recursive(BN_ULONG *r, const BN_ULONG *a, int n2, BN_ULONG *t) { int n=n2/2; int zero,c1; BN_ULONG ln,lo,*p; if (n2 == 4) { #ifndef BN_SQR_COMBA bn_sqr_normal(r,a,4,t); #else bn_sqr_comba4(r,a); #endif return; } else if (n2 == 8) { #ifndef BN_SQR_COMBA bn_sqr_normal(r,a,8,t); #else bn_sqr_comba8(r,a); #endif return; } if (n2 < BN_SQR_RECURSIVE_SIZE_NORMAL) { bn_sqr_normal(r,a,n2,t); return; } /* r=(a[0]-a[1])*(a[1]-a[0]) */ c1=bn_cmp_words(a,&(a[n]),n); zero=0; if (c1 > 0) bn_sub_words(t,a,&(a[n]),n); else if (c1 < 0) bn_sub_words(t,&(a[n]),a,n); else zero=1; /* The result will always be negative unless it is zero */ p= &(t[n2*2]); if (!zero) bn_sqr_recursive(&(t[n2]),t,n,p); else memset(&(t[n2]),0,n2*sizeof(BN_ULONG)); bn_sqr_recursive(r,a,n,p); bn_sqr_recursive(&(r[n2]),&(a[n]),n,p); /* t[32] holds (a[0]-a[1])*(a[1]-a[0]), it is negative or zero * r[10] holds (a[0]*b[0]) * r[32] holds (b[1]*b[1]) */ c1=(int)(bn_add_words(t,r,&(r[n2]),n2)); /* t[32] is negative */ c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2)); /* t[32] holds (a[0]-a[1])*(a[1]-a[0])+(a[0]*a[0])+(a[1]*a[1]) * r[10] holds (a[0]*a[0]) * r[32] holds (a[1]*a[1]) * c1 holds the carry bits */ c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2)); if (c1) { p= &(r[n+n2]); lo= *p; ln=(lo+c1)&BN_MASK2; *p=ln; /* The overflow will stop before we over write * words we should not overwrite */ if (ln < (BN_ULONG)c1) { do { p++; lo= *p; ln=(lo+1)&BN_MASK2; *p=ln; } while (ln == 0); } } } #endif /*BN_RECURSION */ /* unsigned subtraction of b from a, a must be larger than b. */ static int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) { int max,min,dif; register BN_ULONG t1,t2,*ap,*bp,*rp; int i,carry; max = a->top; min = b->top; dif = max - min; if (dif < 0) /* hmm... should not be happening */ { return(0); } if (bn_wexpand(r,max) == NULL) return(0); ap=a->d; bp=b->d; rp=r->d; carry=0; for (i = min; i != 0; i--) { t1= *(ap++); t2= *(bp++); if (carry) { carry=(t1 <= t2); t1=(t1-t2-1)&BN_MASK2; } else { carry=(t1 < t2); t1=(t1-t2)&BN_MASK2; } *(rp++)=t1&BN_MASK2; } if (carry) /* subtracted */ { if (!dif) /* error: a < b */ return 0; while (dif) { dif--; t1 = *(ap++); t2 = (t1-1)&BN_MASK2; *(rp++) = t2; if (t1) break; } } if (rp != ap) { for (;;) { if (!dif--) break; rp[0]=ap[0]; if (!dif--) break; rp[1]=ap[1]; if (!dif--) break; rp[2]=ap[2]; if (!dif--) break; rp[3]=ap[3]; rp+=4; ap+=4; } } r->top=max; bn_correct_top(r); return(1); } static int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont, BN_POOL *ctx) { int retn=0; BIGNUM *n,*r; BN_ULONG *ap,*np,*rp,n0,v,*nrp; int al,nl,max,i,x,ri; if ((r = BN_POOL_get(ctx)) == NULL) goto err; if (!BN_copy(r,a)) goto err; n= &(mont->N); ap=a->d; /* mont->ri is the size of mont->N in bits (rounded up to the word size) */ al=ri=mont->ri/BN_BITS2; nl=n->top; if ((al == 0) || (nl == 0)) { r->top=0; return(1); } max=(nl+al+1); /* allow for overflow (no?) XXX */ if (bn_wexpand(r,max) == NULL) goto err; if (bn_wexpand(ret,max) == NULL) goto err; np=n->d; rp=r->d; nrp= &(r->d[nl]); /* clear the top words of T */ for (i=r->top; id[i]=0; r->top=max; n0=mont->n0; for (i=0; i= v) continue; else { if (((++nrp[0])&BN_MASK2) != 0) continue; if (((++nrp[1])&BN_MASK2) != 0) continue; for (x=2; (((++nrp[x])&BN_MASK2) == 0); x++) ; } } bn_correct_top(r); /* mont->ri will be a multiple of the word size */ x=ri; rp=ret->d; ap= &(r->d[x]); if (r->top < x) al=0; else al=r->top-x; ret->top=al; al-=4; for (i=0; iN)) >= 0) { if (!BN_usub(ret,ret,&(mont->N))) goto err; } retn=1; err: BN_POOL_release(ctx,1); return(retn); } /*SQR*/ /* r must not be a */ /* I've just gone over this and it is now %20 faster on x86 - eay - 27 Jun 96 */ static int BN_sqr(BIGNUM *r, const BIGNUM *a, BN_POOL *ctx) { int release=0; int max,al; int ret = 0; BIGNUM *tmp,*rr; al=a->top; if (al <= 0) { r->top=0; return 1; } if(a != r) rr=r; else {rr=BN_POOL_get(ctx);release++;} tmp=BN_POOL_get(ctx); release++; if (!rr || !tmp) goto err; max = 2 * al; /* Non-zero (from above) */ if (bn_wexpand(rr,max) == NULL) goto err; if (al == 4) { #ifndef BN_SQR_COMBA BN_ULONG t[8]; bn_sqr_normal(rr->d,a->d,4,t); #else bn_sqr_comba4(rr->d,a->d); #endif } else if (al == 8) { #ifndef BN_SQR_COMBA BN_ULONG t[16]; bn_sqr_normal(rr->d,a->d,8,t); #else bn_sqr_comba8(rr->d,a->d); #endif } else { #if defined(BN_RECURSION) if (al < BN_SQR_RECURSIVE_SIZE_NORMAL) { BN_ULONG t[BN_SQR_RECURSIVE_SIZE_NORMAL*2]; bn_sqr_normal(rr->d,a->d,al,t); } else { int j,k; j=BN_num_bits_word((BN_ULONG)al); j=1<<(j-1); k=j+j; if (al == j) { if (bn_wexpand(tmp,k*2) == NULL) goto err; bn_sqr_recursive(rr->d,a->d,al,tmp->d); } else { if (bn_wexpand(tmp,max) == NULL) goto err; bn_sqr_normal(rr->d,a->d,al,tmp->d); } } #else if (bn_wexpand(tmp,max) == NULL) goto err; bn_sqr_normal(rr->d,a->d,al,tmp->d); #endif } /* If the most-significant half of the top word of 'a' is zero, then * the square of 'a' will max-1 words. */ if(a->d[al - 1] == (a->d[al - 1] & BN_MASK2l)) rr->top = max - 1; else rr->top = max; if (rr != r) BN_copy(r,rr); ret = 1; err: BN_POOL_release(ctx,release); return(ret); } static int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_MONT_CTX *mont, BN_POOL *ctx) { BIGNUM *tmp; int ret=0; tmp = BN_POOL_get(ctx); if (tmp == NULL) goto err; if (0 == BN_ucmp(a,b)) { if (!BN_sqr(tmp,a,ctx)) goto err; } else { if (!BN_mul(tmp,a,b,ctx)) goto err; } /* reduce from aRR to aR */ if (!BN_from_montgomery(r,tmp,mont,ctx)) goto err; ret=1; err: BN_POOL_release(ctx,1); return(ret); } /* unsigned add of b to a */ static int BN_uadd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) { int max,min,dif; BN_ULONG *ap,*bp,*rp,carry,t1,t2; const BIGNUM *tmp; if (a->top < b->top) { tmp=a; a=b; b=tmp; } max = a->top; min = b->top; dif = max - min; if (bn_wexpand(r,max+1) == NULL) return 0; r->top=max; ap=a->d; bp=b->d; rp=r->d; carry=bn_add_words(rp,ap,bp,min); rp+=min; ap+=min; bp+=min; if (carry) { while (dif) { dif--; t1 = *(ap++); t2 = (t1+1) & BN_MASK2; *(rp++) = t2; if (t2) { carry=0; break; } } if (carry) { /* carry != 0 => dif == 0 */ *rp = 1; r->top++; } } if (dif && rp != ap) while (dif--) /* copy remaining words if ap != rp */ *(rp++) = *(ap++); return 1; } static int BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) { int max; /* We are actually doing a - b :-) */ max=(a->top > b->top)?a->top:b->top; if (bn_wexpand(r,max) == NULL) return(0); if (BN_ucmp(a,b) < 0) { if (!BN_usub(r,b,a)) return(0); DHPRINT("----\n"); } else { if (!BN_usub(r,a,b)) return(0); } return(1); } static int BN_mul_word(BIGNUM *a, BN_ULONG w) { BN_ULONG ll; w&=BN_MASK2; if (a->top) { if (w == 0) BN_zero(a); else { ll=bn_mul_words(a->d,a->d,a->top,w); if (ll) { if (bn_wexpand(a,a->top+1) == NULL) return(0); a->d[a->top++]=ll; } } } return(1); } /* solves ax == 1 (mod n) */ static BIGNUM *BN_mod_inverse(BIGNUM *in, const BIGNUM *a, const BIGNUM *n, BN_POOL *ctx) { BIGNUM *A,*B,*X,*Y,*M,*D,*T,*R=NULL; BIGNUM *ret=NULL; int sign; A = BN_POOL_get(ctx); B = BN_POOL_get(ctx); X = BN_POOL_get(ctx); D = BN_POOL_get(ctx); M = BN_POOL_get(ctx); Y = BN_POOL_get(ctx); T = BN_POOL_get(ctx); if (T == NULL) goto err; if (in == NULL) R=BN_new(); else R=in; if (R == NULL) goto err; BN_one(X); BN_zero(Y); if (BN_copy(B,a) == NULL) goto err; if (BN_copy(A,n) == NULL) goto err; if (BN_ucmp(B, A) >= 0) { if (!(BN_mod(B, B, A, ctx))) goto err; } sign = -1; /* From B = a mod |n|, A = |n| it follows that * * 0 <= B < A, * -sign*X*a == B (mod |n|), * sign*Y*a == A (mod |n|). */ if (BN_is_odd(n) && (BN_num_bits(n) <= (BN_BITS <= 32 ? 450 : 2048))) { /* Binary inversion algorithm; requires odd modulus. * This is faster than the general algorithm if the modulus * is sufficiently small (about 400 .. 500 bits on 32-bit * sytems, but much more on 64-bit systems) */ int shift; while (!BN_is_zero(B)) { /* * 0 < B < |n|, * 0 < A <= |n|, * (1) -sign*X*a == B (mod |n|), * (2) sign*Y*a == A (mod |n|) */ /* Now divide B by the maximum possible power of two in the integers, * and divide X by the same value mod |n|. * When we're done, (1) still holds. */ shift = 0; while (!BN_is_bit_set(B, shift)) /* note that 0 < B */ { shift++; if (BN_is_odd(X)) { if (!BN_uadd(X, X, n)) goto err; } /* now X is even, so we can easily divide it by two */ if (!BN_rshift1(X, X)) goto err; } if (shift > 0) { if (!BN_rshift(B, B, shift)) goto err; } /* Same for A and Y. Afterwards, (2) still holds. */ shift = 0; while (!BN_is_bit_set(A, shift)) /* note that 0 < A */ { shift++; if (BN_is_odd(Y)) { if (!BN_uadd(Y, Y, n)) goto err; } /* now Y is even */ if (!BN_rshift1(Y, Y)) goto err; } if (shift > 0) { if (!BN_rshift(A, A, shift)) goto err; } /* We still have (1) and (2). * Both A and B are odd. * The following computations ensure that * * 0 <= B < |n|, * 0 < A < |n|, * (1) -sign*X*a == B (mod |n|), * (2) sign*Y*a == A (mod |n|), * * and that either A or B is even in the next iteration. */ if (BN_ucmp(B, A) >= 0) { /* -sign*(X + Y)*a == B - A (mod |n|) */ if (!BN_uadd(X, X, Y)) goto err; /* NB: we could use BN_mod_add_quick(X, X, Y, n), but that * actually makes the algorithm slower */ if (!BN_usub(B, B, A)) goto err; } else { /* sign*(X + Y)*a == A - B (mod |n|) */ if (!BN_uadd(Y, Y, X)) goto err; /* as above, BN_mod_add_quick(Y, Y, X, n) would slow things down */ if (!BN_usub(A, A, B)) goto err; } } } else goto err; /* * The while loop (Euclid's algorithm) ends when * A == gcd(a,n); * we have * sign*Y*a == A (mod |n|), * where Y is non-negative. */ if (sign < 0) { if (!BN_sub(Y,n,Y)) goto err; } /* Now Y*a == A (mod |n|). */ if (BN_is_one(A)) { /* Y*a == 1 (mod |n|) */ if (BN_ucmp(Y,n) < 0) { if (!BN_copy(R,Y)) goto err; } else { if (!(BN_mod(R, Y, n, ctx))) goto err; } } else { goto err; } ret=R; err: if ((ret == NULL) && (in == NULL)) BN_free(R); BN_POOL_release(ctx,7); return(ret); } static int BN_sub_word(BIGNUM *a, BN_ULONG w) { int i; w &= BN_MASK2; /* degenerate case: w is zero */ if (!w) return 1; /* degenerate case: a is zero */ if(BN_is_zero(a)) { i = BN_set_word(a,w); if (i != 0) { DHPRINT("BN_set_negative\n"); /*BN_set_negative(a, 1); */ } return i; } if ((a->top == 1) && (a->d[0] < w)) { a->d[0]=w-a->d[0]; DHPRINT("--------\n"); return(1); } i=0; for (;;) { if (a->d[i] >= w) { a->d[i]-=w; break; } else { a->d[i]=(a->d[i]-w)&BN_MASK2; i++; w=1; } } if ((a->d[i] == 0) && (i == (a->top-1))) a->top--; return(1); } static int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_POOL *ctx) { int ret = 0; BIGNUM *Ri,*R; BIGNUM tmod; BN_ULONG buf[2]; if((Ri = BN_POOL_get(ctx)) == NULL) goto err; R= &(mont->RR); /* grab RR as a temp */ if (!BN_copy(&(mont->N),mod)) goto err; /* Set N */ /*start*/ mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2; BN_zero(R); if (!(BN_set_bit(R,BN_BITS2))) goto err; /* R */ buf[0]=mod->d[0]; /* tmod = N mod word size */ buf[1]=0; tmod.d=buf; tmod.top = buf[0] != 0 ? 1 : 0; tmod.dmax=2; /* Ri = R^-1 mod N*/ if ((BN_mod_inverse(Ri,R,&tmod,ctx)) == NULL) goto err; if (!BN_lshift(Ri,Ri,BN_BITS2)) goto err; /* R*Ri */ if (!BN_is_zero(Ri)) { if (!BN_sub_word(Ri,1)) goto err; } else /* if N mod word size == 1 */ { if (!BN_set_word(Ri,BN_MASK2)) goto err; /* Ri-- (mod word size) */ } if (!BN_div(Ri,NULL,Ri,&tmod,ctx)) goto err; /* Ni = (R*Ri-1)/N, * keep only least significant word: */ mont->n0 = (Ri->top > 0) ? Ri->d[0] : 0; /*end*/ /* setup RR for conversions */ BN_zero(&(mont->RR)); if (!BN_set_bit(&(mont->RR),mont->ri*2)) goto err; if (!BN_mod(&(mont->RR),&(mont->RR),&(mont->N),ctx)) goto err; ret = 1; err: return ret; } static const EVP_MD sha1_md= { NID_sha1, SHA_DIGEST_LENGTH, 0, RAND_init, RAND_update, RAND_final, sizeof(EVP_MD *)+sizeof(OPEN_SHA_CTX), }; /* This call frees resources associated with the context */ static int EVP_MD_CTX_cleanup(EVP_MD_CTX *ctx) { /* Don't assume ctx->md_data was cleaned in EVP_Digest_Final, * because sometimes only copies of the context are ever finalised. */ if (ctx->digest && ctx->digest->ctx_size && ctx->md_data) { os_free_mem(NULL, ctx->md_data); } memset(ctx,'\0',sizeof *ctx); return 1; } /* The caller can assume that this removes any secret data from the context */ static int EVP_DigestFinal_ex(EVP_MD_CTX *ctx, unsigned char *md, unsigned int *size) { int ret; ret=ctx->digest->final(ctx,md); if (size != NULL) *size=ctx->digest->md_size; memset(ctx->md_data,0,ctx->digest->ctx_size); return ret; } static void EVP_MD_CTX_init(EVP_MD_CTX *ctx) { memset(ctx,'\0',sizeof *ctx); } static int EVP_DigestInit_ex(EVP_MD_CTX *ctx, const EVP_MD *type) { if (ctx->digest != type) { if (ctx->digest && ctx->digest->ctx_size) os_free_mem(NULL, ctx->md_data); ctx->digest=type; if (type->ctx_size) os_alloc_mem(NULL, (UCHAR **)&(ctx->md_data), type->ctx_size); } return ctx->digest->init(ctx); } static int EVP_DigestUpdate(EVP_MD_CTX *ctx, const void *data, size_t count) { return ctx->digest->update(ctx,data,count); } static void ssleay_rand_add(const void *buf, int num, long add) { int i,j,k,st_idx; long md_c[2]; unsigned char local_md[SHA_DIGEST_LENGTH]; EVP_MD_CTX m; /* * (Based on the rand(3) manpage) * * The input is chopped up into units of 20 bytes (or less for * the last block). Each of these blocks is run through the hash * function as follows: The data passed to the hash function * is the current 'md', the same number of bytes from the 'state' * (the location determined by in incremented looping index) as * the current 'block', the new key data 'block', and 'count' * (which is incremented after each use). * The result of this is kept in 'md' and also xored into the * 'state' at the same locations that were used as input into the * hash function. */ st_idx=g_state_index; /* use our own copies of the counters so that even * if a concurrent thread seeds with exactly the * same data and uses the same subarray there's _some_ * difference */ md_c[0] = g_md_count[0]; md_c[1] = g_md_count[1]; memcpy(local_md, g_md, sizeof g_md); /* g_state_index <= g_state_num <= STATE_SIZE */ g_state_index += num; if (g_state_index >= STATE_SIZE) { g_state_index%=STATE_SIZE; g_state_num=STATE_SIZE; } else if (g_state_num < STATE_SIZE) { if (g_state_index > g_state_num) g_state_num=g_state_index; } /* g_state_index <= g_state_num <= STATE_SIZE */ /* g_state[st_idx], ..., g_state[(st_idx + num - 1) % STATE_SIZE] * are what we will use now, but other threads may use them * as well */ g_md_count[1] += (num / SHA_DIGEST_LENGTH) + (num % SHA_DIGEST_LENGTH > 0); EVP_MD_CTX_init(&m); for (i=0; i SHA_DIGEST_LENGTH)?SHA_DIGEST_LENGTH:j; EVP_DigestInit_ex(&m,&sha1_md); EVP_DigestUpdate(&m,local_md,SHA_DIGEST_LENGTH); k=(st_idx+j)-STATE_SIZE; if (k > 0) { EVP_DigestUpdate(&m,&(g_state[st_idx]),j-k); EVP_DigestUpdate(&m,&(g_state[0]),k); } else EVP_DigestUpdate(&m,&(g_state[st_idx]),j); EVP_DigestUpdate(&m,buf,j); EVP_DigestUpdate(&m,(unsigned char *)&(md_c[0]),sizeof(md_c)); EVP_DigestFinal_ex(&m,local_md,NULL); md_c[1]++; buf=(const char *)buf + j; for (k=0; k= STATE_SIZE) st_idx=0; } } EVP_MD_CTX_cleanup(&m); /* Don't just copy back local_md into md -- this could mean that * other thread's seeding remains without effect (except for * the incremented counter). By XORing it we keep at least as * much entropy as fits into md. */ for (k = 0; k < (int)sizeof(g_md); k++) { g_md[k] ^= local_md[k]; } if (g_entropy < ENTROPY_NEEDED) /* stop counting when we have enough */ g_entropy += add; } static int ssleay_rand_bytes(unsigned char *buf, int num) { static volatile int stirred_pool = 0; int i,j,k,st_num,st_idx; int num_ceil; int ok; long md_c[2]; unsigned char local_md[SHA_DIGEST_LENGTH]; EVP_MD_CTX m; #ifndef GETPID_IS_MEANINGLESS pid_t curr_pid = getpid(); #endif int do_stir_pool = 0; if (num <= 0) return 1; EVP_MD_CTX_init(&m); /* round upwards to multiple of SHA_DIGEST_LENGTH/2 */ num_ceil = (1 + (num-1)/(SHA_DIGEST_LENGTH/2)) * (SHA_DIGEST_LENGTH/2); /* * (Based on the rand(3) manpage:) * * For each group of 10 bytes (or less), we do the following: * * Input into the hash function the local 'md' (which is RAND_initialized from * the global 'md' before any bytes are generated), the bytes that are to * be overwritten by the random bytes, and bytes from the 'state' * (incrementing looping index). From this digest output (which is kept * in 'md'), the top (up to) 10 bytes are returned to the caller and the * bottom 10 bytes are xored into the 'state'. * * Finally, after we have finished 'num' random bytes for the * caller, 'count' (which is incremented) and the local and global 'md' * are fed into the hash function and the results are kept in the * global 'md'. */ if (!RAND_initialized) { RAND_poll(); RAND_initialized = 1; } if (!stirred_pool) do_stir_pool = 1; ok = (g_entropy >= ENTROPY_NEEDED); if (!ok) { /* If the PRNG state is not yet unpredictable, then seeing * the PRNG output may help attackers to determine the new * state; thus we have to decrease the entropy estimate. * Once we've had enough initial seeding we don't bother to * adjust the entropy count, though, because we're not ambitious * to provide *information-theoretic* randomness. * * NOTE: This approach fails if the program forks before * we have enough entropy. Entropy should be collected * in a separate input pool and be transferred to the * output pool only when the entropy limit has been reached. */ g_entropy -= num; if (g_entropy < 0) g_entropy = 0; } if (do_stir_pool) { /* In the output function only half of 'md' remains secret, * so we better make sure that the required entropy gets * 'evenly distributed' through 'state', our randomness pool. * The input function (ssleay_rand_add) chains all of 'md', * which makes it more suitable for this purpose. */ int n = STATE_SIZE; /* so that the complete pool gets accessed */ while (n > 0) { /* Note that the seed does not matter, it's just that * ssleay_rand_add expects to have something to hash. */ /* at least SHA_DIGEST_LENGTH "...................." */ ssleay_rand_add("....................", SHA_DIGEST_LENGTH, 0); n -= SHA_DIGEST_LENGTH; } if (ok) stirred_pool = 1; } st_idx=g_state_index; st_num=g_state_num; md_c[0] = g_md_count[0]; md_c[1] = g_md_count[1]; memcpy(local_md, g_md, sizeof g_md); g_state_index+=num_ceil; if (g_state_index > g_state_num) g_state_index %= g_state_num; /* g_state[st_idx], ..., g_state[(st_idx + num_ceil - 1) % st_num] * are now ours (but other threads may use them too) */ g_md_count[0] += 1; while (num > 0) { /* num_ceil -= SHA_DIGEST_LENGTH/2 */ j=(num >= SHA_DIGEST_LENGTH/2)?SHA_DIGEST_LENGTH/2:num; num-=j; EVP_DigestInit_ex(&m,&sha1_md); #ifndef GETPID_IS_MEANINGLESS if (curr_pid) /* just in the first iteration to save time */ { EVP_DigestUpdate(&m,(unsigned char*)&curr_pid,sizeof curr_pid); curr_pid = 0; } #endif EVP_DigestUpdate(&m,local_md,SHA_DIGEST_LENGTH); EVP_DigestUpdate(&m,(unsigned char *)&(md_c[0]),sizeof(md_c)); #ifndef PURIFY EVP_DigestUpdate(&m,buf,j); /* purify complains */ #endif k=(st_idx+SHA_DIGEST_LENGTH/2)-st_num; if (k > 0) { EVP_DigestUpdate(&m,&(g_state[st_idx]),SHA_DIGEST_LENGTH/2-k); EVP_DigestUpdate(&m,&(g_state[0]),k); } else EVP_DigestUpdate(&m,&(g_state[st_idx]),SHA_DIGEST_LENGTH/2); EVP_DigestFinal_ex(&m,local_md,NULL); for (i=0; i= st_num) st_idx=0; if (i < j) *(buf++)=local_md[i+SHA_DIGEST_LENGTH/2]; } } EVP_DigestInit_ex(&m,&sha1_md); EVP_DigestUpdate(&m,(unsigned char *)&(md_c[0]),sizeof(md_c)); EVP_DigestUpdate(&m,local_md,SHA_DIGEST_LENGTH); EVP_DigestUpdate(&m,g_md,SHA_DIGEST_LENGTH); EVP_DigestFinal_ex(&m,g_md,NULL); EVP_MD_CTX_cleanup(&m); if (ok) return(1); else { return(0); } } static int RAND_poll(void) { unsigned long l=100; unsigned char tmpbuf[ENTROPY_NEEDED]; int n = 0; n=ENTROPY_NEEDED; if (n > 0) { ssleay_rand_add(tmpbuf,sizeof tmpbuf,(long)n); } /* put in some default random data, we need more than just this */ l=100;/*curr_pid; */ ssleay_rand_add(&l,sizeof(l),0); l=200;/*getuid(); */ ssleay_rand_add(&l,sizeof(l),0); #ifdef USERSPACE l=time(NULL); #else NdisGetSystemUpTime(&l); #endif /*USERSPACE */ ssleay_rand_add(&l,sizeof(l),0); return 0; } static int bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom) { unsigned char *buf=NULL; int ret=0,bit,bytes,mask; time_t tim; if (bits == 0) { BN_zero(rnd); return 1; } bytes=(bits+7)/8; bit=(bits-1)%8; mask=0xff<<(bit+1); os_alloc_mem(NULL, (UCHAR **)&buf, bytes); if (buf == NULL) { goto err; } /* make a random number and set the top and bottom bits */ #ifdef USERSPACE time(&tim); #else NdisGetSystemUpTime((ULONG *)(&tim)); #endif /*USERSPACE */ ssleay_rand_add(&tim,sizeof(tim),0); if (pseudorand) { if (ssleay_rand_bytes(buf, bytes) == -1) goto err; } else { if (ssleay_rand_bytes(buf, bytes) <= 0) goto err; } if (pseudorand == 2) { /* generate patterns that are more likely to trigger BN library bugs */ int i; unsigned char c; for (i = 0; i < bytes; i++) { ssleay_rand_bytes(&c, 1); if (c >= 128 && i > 0) buf[i] = buf[i-1]; else if (c < 42) buf[i] = 0; else if (c < 84) buf[i] = 255; } } if (top != -1) { if (top) { if (bit == 0) { buf[0]=1; buf[1]|=0x80; } else { buf[0]|=(3<<(bit-1)); } } else { buf[0]|=(1<ri=0; BN_init(&(ret->RR)); BN_init(&(ret->N)); ret->flags=BN_FLG_MALLOCED; return(ret); } static void BN_CTX_free(BN_POOL *ctx) { if (ctx == NULL) return; BN_POOL_finish(ctx); os_free_mem(NULL, ctx); } static void BN_MONT_CTX_free(BN_MONT_CTX *mont) { if(mont == NULL) return; BN_free(&(mont->RR)); BN_free(&(mont->N)); if (mont->flags & BN_FLG_MALLOCED) os_free_mem(NULL, mont); } #define BN_MOD_MUL_WORD(r, w, m) (BN_mul_word(r, (w)) && ((BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1)))) static int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, const BIGNUM *m, BN_POOL *ctx, BN_MONT_CTX *in_mont) { BN_MONT_CTX *mont = NULL; int b, bits, ret=0; int r_is_one; BN_ULONG w, next_w; BIGNUM *d, *r, *t; BIGNUM *swap_tmp; if (BN_get_flags(p, BN_FLG_EXP_CONSTTIME) != 0) { /* BN_FLG_EXP_CONSTTIME only supported by BN_mod_exp_mont() */ return -1; } if (!BN_is_odd(m)) { return(0); } if (m->top == 1) a %= m->d[0]; /* make sure that 'a' is reduced */ bits = BN_num_bits(p); if (bits == 0) { ret = BN_one(rr); return ret; } if (a == 0) { BN_zero(rr); ret = 1; return ret; } d = BN_POOL_get(ctx); r = BN_POOL_get(ctx); t = BN_POOL_get(ctx); if (d == NULL || r == NULL || t == NULL) goto err; if (in_mont != NULL) mont=in_mont; else { if ((mont = BN_MONT_CTX_new()) == NULL) goto err; if (!BN_MONT_CTX_set(mont, m, ctx)) goto err; } r_is_one = 1; /* except for Montgomery factor */ /* bits-1 >= 0 */ /* The result is accumulated in the product r*w. */ w = a; /* bit 'bits-1' of 'p' is always set */ for (b = bits-2; b >= 0; b--) { /* First, square r*w. */ next_w = w*w; if ((next_w/w) != w) /* overflow */ { if (r_is_one) { if (!(BN_set_word(r, (w)) && BN_mod_mul_montgomery((r),(r),&((mont)->RR),(mont),(ctx)))) goto err; r_is_one = 0; } else { if (!BN_MOD_MUL_WORD(r, w, m)) goto err; } next_w = 1; } w = next_w; if (!r_is_one) { if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) goto err; } /* Second, multiply r*w by 'a' if exponent bit is set. */ if (BN_is_bit_set(p, b)) { next_w = w*a; if ((next_w/a) != w) /* overflow */ { if (r_is_one) { if (!(BN_set_word(r, (w)) && BN_mod_mul_montgomery((r),(r),&((mont)->RR),(mont),(ctx)))) goto err; r_is_one = 0; } else { if (!BN_MOD_MUL_WORD(r, w, m)) goto err; } next_w = a; } w = next_w; } } /* Finally, set r:=r*w. */ if (w != 1) { if (r_is_one) { if (!(BN_set_word(r, (w)) && BN_mod_mul_montgomery((r),(r),&((mont)->RR),(mont),(ctx)))) goto err; r_is_one = 0; } else { if (!BN_MOD_MUL_WORD(r, w, m)) goto err; } } if (r_is_one) /* can happen only if a == 1*/ { if (!BN_one(rr)) goto err; } else { if (!BN_from_montgomery(rr, r, mont, ctx)) goto err; } ret = 1; err: if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont); BN_POOL_release(ctx,3); return(ret); } /* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout * so that accessing any of these table values shows the same access pattern as far * as cache lines are concerned. The following functions are used to transfer a BIGNUM * from/to that table. */ static int MOD_EXP_CTIME_COPY_TO_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, int width) { size_t i, j; if (bn_wexpand(b, top) == NULL) return 0; while (b->top < top) { b->d[b->top++] = 0; } for (i = 0, j=idx; i < top * sizeof b->d[0]; i++, j+=width) { buf[j] = ((unsigned char*)b->d)[i]; } bn_correct_top(b); return 1; } static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, int width) { size_t i, j; if (bn_wexpand(b, top) == NULL) return 0; for (i=0, j=idx; i < top * sizeof b->d[0]; i++, j+=width) { ((unsigned char*)b->d)[i] = buf[j]; } b->top = top; bn_correct_top(b); return 1; } /* This variant of BN_mod_exp_mont() uses fixed windows and the special * precomputation memory layout to limit data-dependency to a minimum * to protect secret exponents (cf. the hyper-threading timing attacks * pointed out by Colin Percival, * http://www.daemonology.net/hyperthreading-considered-harmful */ static int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, BN_POOL *ctx, BN_MONT_CTX *in_mont) { int i,bits,ret=0,idx,window,wvalue; int top; BIGNUM *r; const BIGNUM *aa; BN_MONT_CTX *mont=NULL; int numPowers; unsigned char *powerbufFree=NULL; int powerbufLen = 0; unsigned char *powerbuf=NULL; BIGNUM *computeTemp=NULL, *am=NULL; top = m->top; if (!(m->d[0] & 1)) { return(0); } bits=BN_num_bits(p); if (bits == 0) { ret = BN_one(rr); return ret; } r = BN_POOL_get(ctx); if (r == NULL) goto err; /* Allocate a montgomery context if it was not supplied by the caller. * If this is not done, things will break in the montgomery part. */ if (in_mont != NULL) mont=in_mont; else { if ((mont=BN_MONT_CTX_new()) == NULL) goto err; if (!BN_MONT_CTX_set(mont,m,ctx)) goto err; } /* Get the window size to use with size of p. */ window = BN_window_bits_for_ctime_exponent_size(bits); /* Allocate a buffer large enough to hold all of the pre-computed * powers of a. */ numPowers = 1 << window; powerbufLen = sizeof(m->d[0])*top*numPowers; os_alloc_mem(NULL, (UCHAR **)&powerbufFree, powerbufLen+MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH); if (powerbufFree == NULL) goto err; powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree); memset(powerbuf, 0, powerbufLen); /* Initialize the intermediate result. Do this early to save double conversion, * once each for a^0 and intermediate result. */ if (!BN_mod_mul_montgomery((r),(BN_value_one()),&((mont)->RR),(mont),(ctx))) goto err; if (!MOD_EXP_CTIME_COPY_TO_PREBUF(r, top, powerbuf, 0, numPowers)) goto err; /* Initialize computeTemp as a^1 with montgomery precalcs */ computeTemp = BN_POOL_get(ctx); am = BN_POOL_get(ctx); if (computeTemp==NULL || am==NULL) goto err; if (BN_ucmp(a,m) >= 0) { if (!BN_mod(am,a,m,ctx)) goto err; aa= am; } else aa=a; if (!BN_mod_mul_montgomery((am),(aa),&((mont)->RR),(mont),(ctx))) goto err; if (!BN_copy(computeTemp, am)) goto err; if (!MOD_EXP_CTIME_COPY_TO_PREBUF(am, top, powerbuf, 1, numPowers)) goto err; /* If the window size is greater than 1, then calculate * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1) * (even powers could instead be computed as (a^(i/2))^2 * to use the slight performance advantage of sqr over mul). */ if (window > 1) { for (i=2; i= 0) { wvalue=0; /* The 'value' of the window */ /* Scan the window, squaring the result as we go */ for (i=0; i= 0) { if (!(BN_mod(val[0], a, m, ctx))) goto err; aa= val[0]; } else aa=a; if (BN_is_zero(aa)) { BN_zero(rr); ret = 1; goto err; } if (!BN_mod_mul_montgomery((val[0]),(aa),&((mont)->RR),(mont),(ctx))) goto err; /* 1 */ window = BN_window_bits_for_exponent_size(bits); if (window > 1) { if (!BN_mod_mul_montgomery(d,val[0],val[0],mont,ctx)) goto err; /* 2 */ j=1<<(window-1); for (i=1; iRR),(mont),(ctx))) goto err; for (;;) { if (BN_is_bit_set(p,wstart) == 0) { if (!start) { if (!BN_mod_mul_montgomery(r,r,r,mont,ctx)) goto err; } if (wstart == 0) break; wstart--; continue; } /* We now have wstart on a 'set' bit, we now need to work out * how bit a window to do. To do this we need to scan * forward until the last set bit before the end of the * window */ j=wstart; wvalue=1; wend=0; for (i=1; i>1],mont,ctx)) goto err; /* move the 'window' down further */ wstart-=wend+1; wvalue=0; start=0; if (wstart < 0) break; } if (!BN_from_montgomery(rr,r,mont,ctx)) goto err; ret=1; err: if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont); BN_POOL_release(ctx,release); return(ret); } /*KERNEL MODULE TRACE*/ static unsigned char DH_P_VALUE[DH_KEY_LEN] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xC9, 0x0F, 0xDA, 0xA2, 0x21, 0x68, 0xC2, 0x34, 0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1, 0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74, 0x02, 0x0B, 0xBE, 0xA6, 0x3B, 0x13, 0x9B, 0x22, 0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD, 0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B, 0x30, 0x2B, 0x0A, 0x6D, 0xF2, 0x5F, 0x14, 0x37, 0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45, 0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6, 0xF4, 0x4C, 0x42, 0xE9, 0xA6, 0x37, 0xED, 0x6B, 0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED, 0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5, 0xAE, 0x9F, 0x24, 0x11, 0x7C, 0x4B, 0x1F, 0xE6, 0x49, 0x28, 0x66, 0x51, 0xEC, 0xE4, 0x5B, 0x3D, 0xC2, 0x00, 0x7C, 0xB8, 0xA1, 0x63, 0xBF, 0x05, 0x98, 0xDA, 0x48, 0x36, 0x1C, 0x55, 0xD3, 0x9A, 0x69, 0x16, 0x3F, 0xA8, 0xFD, 0x24, 0xCF, 0x5F, 0x83, 0x65, 0x5D, 0x23, 0xDC, 0xA3, 0xAD, 0x96, 0x1C, 0x62, 0xF3, 0x56, 0x20, 0x85, 0x52, 0xBB, 0x9E, 0xD5, 0x29, 0x07, 0x70, 0x96, 0x96, 0x6D, 0x67, 0x0C, 0x35, 0x4E, 0x4A, 0xBC, 0x98, 0x04, 0xF1, 0x74, 0x6C, 0x08, 0xCA, 0x23, 0x73, 0x27, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; static DH *DH_new_method(void) { DH *ret; os_alloc_mem(NULL, (UCHAR **)&ret, sizeof(DH)); if (ret == NULL) { return(NULL); } ret->p=NULL; ret->g=NULL; ret->pub_key=NULL; ret->priv_key=NULL; ret->flags=0; return(ret); } static int dh_bn_mod_exp(const DH *dh, BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, BN_POOL *ctx, BN_MONT_CTX *m_ctx) { /* If a is only one word long and constant time is false, use the faster * exponenentiation function. */ if (a->top == 1 && ((dh->flags & DH_FLAG_NO_EXP_CONSTTIME) != 0)) { BN_ULONG A = a->d[0]; return BN_mod_exp_mont_word(r,A,p,m,ctx,m_ctx); } else return BN_mod_exp_mont(r,a,p,m,ctx,m_ctx); } static int generate_key(DH *dh) { int ok=0; int generate_new_key=0; unsigned lvar; BN_POOL *ctx; BN_MONT_CTX *mont=NULL; BIGNUM *pub_key=NULL,*priv_key=NULL; ctx = BN_CTX_new(); if (ctx == NULL) goto err; if (dh->priv_key == NULL) { priv_key=BN_new(); if (priv_key == NULL) goto err; generate_new_key=1; } else priv_key=dh->priv_key; if (dh->pub_key == NULL) { pub_key=BN_new(); if (pub_key == NULL) goto err; } else pub_key=dh->pub_key; if (generate_new_key) { lvar = BN_num_bits(dh->p)-1; /* secret exponent length */ if (!bnrand(0, priv_key, lvar, 0, 0)) goto err; } { BIGNUM local_prk; BIGNUM *prk; if ((dh->flags & DH_FLAG_NO_EXP_CONSTTIME) == 0) { BN_init(&local_prk); prk = &local_prk; BN_with_flags(prk, priv_key, BN_FLG_EXP_CONSTTIME); } else prk = priv_key; if (!dh_bn_mod_exp(dh, pub_key, dh->g, prk, dh->p, ctx, mont)) goto err; } dh->pub_key=pub_key; dh->priv_key=priv_key; ok=1; err: if ((pub_key != NULL) && (dh->pub_key == NULL)) BN_free(pub_key); if ((priv_key != NULL) && (dh->priv_key == NULL)) BN_free(priv_key); BN_CTX_free(ctx); return(ok); } static void DH_free(DH *r) { if(r == NULL) return; if (r->p != NULL) BN_clear_free(r->p); if (r->g != NULL) BN_clear_free(r->g); if (r->pub_key != NULL) BN_clear_free(r->pub_key); if (r->priv_key != NULL) BN_clear_free(r->priv_key); os_free_mem(NULL, r); } static int compute_key(unsigned char *key, const BIGNUM *pub_key, DH *dh) { BN_POOL *ctx=NULL; BN_MONT_CTX *mont=NULL; BIGNUM *tmp; int ret= -1; if (BN_num_bits(dh->p) > OPENSSL_DH_MAX_MODULUS_BITS) goto err; ctx = BN_CTX_new(); if (ctx == NULL) goto err; tmp = BN_POOL_get(ctx); if (dh->priv_key == NULL) goto err; if (!dh_bn_mod_exp(dh, tmp, pub_key, dh->priv_key,dh->p,ctx,mont)) goto err; ret=BN_bn2bin(tmp,key); err: BN_CTX_free(ctx); return(ret); } static DH *DHSecret=NULL; static DH *DHSecretR=NULL; void GenerateDHPublicKey(unsigned char *ran_buf, int ran_len, unsigned char *dhkey, int *dhkey_len) { unsigned int DH_G_VALUE = 2; unsigned int g = htonl(DH_G_VALUE); *dhkey_len = 0; if(DHSecret == NULL) { DHSecret = DH_new_method(); if(NULL == DHSecret) goto err; DHSecret->p = BN_new(); DHSecret->g = BN_new(); } else if((DHSecret != NULL) && (DHSecretR == NULL)) { DH_free(DHSecret); DHSecret = NULL; DHSecret = DH_new_method(); if(NULL == DHSecret) goto err; DHSecret->p = BN_new(); DHSecret->g = BN_new(); } else if((DHSecret != NULL) && (DHSecretR != NULL)) { DHPRINT("reuse DHSecret\n"); } BN_bin2bn(DH_P_VALUE, DH_KEY_LEN, DHSecret->p); BN_bin2bn((const unsigned char *)&g, 4, DHSecret->g); /* if (!DH_check(DHSecret, &i)) goto err; if (i & DH_CHECK_P_NOT_PRIME) DHPRINT( "p value is not prime\n"); if (i & DH_CHECK_P_NOT_SAFE_PRIME) DHPRINT( "p value is not a safe prime\n"); if (i & DH_UNABLE_TO_CHECK_GENERATOR) DHPRINT( "unable to check the generator value\n"); if (i & DH_NOT_SUITABLE_GENERATOR) DHPRINT( "the g value is not a generator\n"); */ /*DHSecret->flags &= ~DH_FLAG_NO_EXP_CONSTTIME; */ DHSecret->flags |= DH_FLAG_NO_EXP_CONSTTIME; if (!generate_key(DHSecret)) goto err; BN_bn2bin(DHSecret->pub_key,dhkey); *dhkey_len = BN_num_bytes(DHSecret->pub_key); DHPRINT("GenerateDHPublicKey Success dhkey_len%d\n",*dhkey_len); /* DHSecret will use and free on GenerateDHSecreteKey. NO free it */ return; err: DH_free(DHSecret); DHSecret = NULL; DHPRINT("GenerateDHPublicKey EXIT\n"); return; } /******* KDK generation ****** */ /*generate the DH shared secret */ void GenerateDHSecreteKey(unsigned char *ran_buf, int ran_len, unsigned char *peer_dhkey, int peer_dhkey_len, unsigned char *secrete_dhkey, int *secrete_dhkey_len) { BIGNUM DH_PubKey_Peer; *secrete_dhkey_len = 0; if(NULL == secrete_dhkey) goto err; if(NULL == DHSecret) goto err; DH_PubKey_Peer.flags=BN_FLG_MALLOCED; DH_PubKey_Peer.top=0; DH_PubKey_Peer.dmax=0; DH_PubKey_Peer.d=NULL; BN_bin2bn(peer_dhkey, peer_dhkey_len, &DH_PubKey_Peer); *secrete_dhkey_len = compute_key(secrete_dhkey,&DH_PubKey_Peer,DHSecret); if (DH_PubKey_Peer.d != NULL) os_free_mem(NULL, DH_PubKey_Peer.d); DHPRINT("GenerateDHSecreteKey Success\n"); err: DH_free(DHSecret); DHSecret = NULL; DHPRINT("GenerateDHSecreteKey EXIT\n"); return; } void DH_freeall(void) { DH_free(DHSecret); DHSecret = NULL; return; } #else /* CRYPT_GPL_ALGORITHM */ /* ======================================================================== Routine Description: Diffie-Hellman public key generation Arguments: GValue Array in UINT8 GValueLength The length of G in bytes PValue Array in UINT8 PValueLength The length of P in bytes PrivateKey Private key PrivateKeyLength The length of Private key in bytes Return Value: PublicKey Public key PublicKeyLength The length of public key in bytes Note: Reference to RFC2631 PublicKey = G^PrivateKey (mod P) ======================================================================== */ void DH_PublicKey_Generate ( IN UINT8 GValue[], IN UINT GValueLength, IN UINT8 PValue[], IN UINT PValueLength, IN UINT8 PrivateKey[], IN UINT PrivateKeyLength, OUT UINT8 PublicKey[], INOUT UINT *PublicKeyLength) { PBIG_INTEGER pBI_G = NULL; PBIG_INTEGER pBI_P = NULL; PBIG_INTEGER pBI_PrivateKey = NULL; PBIG_INTEGER pBI_PublicKey = NULL; /* * 1. Check the input parameters * - GValueLength, PValueLength and PrivateLength must be large than zero * - PublicKeyLength must be large or equal than PValueLength * - PValue must be odd * * - PValue must be prime number (no implement) * - GValue must be greater than 0 but less than the PValue (no implement) */ if (GValueLength == 0) { DBGPRINT(RT_DEBUG_ERROR, ("DH_PublicKey_Generate: G length is (%d)\n", GValueLength)); return; } /* End of if */ if (PValueLength == 0) { DBGPRINT(RT_DEBUG_ERROR, ("DH_PublicKey_Generate: P length is (%d)\n", PValueLength)); return; } /* End of if */ if (PrivateKeyLength == 0) { DBGPRINT(RT_DEBUG_ERROR, ("DH_PublicKey_Generate: private key length is (%d)\n", PrivateKeyLength)); return; } /* End of if */ if (*PublicKeyLength < PValueLength) { DBGPRINT(RT_DEBUG_ERROR, ("DH_PublicKey_Generate: public key length(%d) must be large or equal than P length(%d)\n", *PublicKeyLength, PValueLength)); return; } /* End of if */ if (!(PValue[PValueLength - 1] & 0x1)) { DBGPRINT(RT_DEBUG_ERROR, ("DH_PublicKey_Generate: P value must be odd\n")); return; } /* End of if */ /* * 2. Transfer parameters to BigInteger structure */ BigInteger_Init(&pBI_G); BigInteger_Init(&pBI_P); BigInteger_Init(&pBI_PrivateKey); BigInteger_Init(&pBI_PublicKey); BigInteger_Bin2BI(GValue, GValueLength, &pBI_G); BigInteger_Bin2BI(PValue, PValueLength, &pBI_P); BigInteger_Bin2BI(PrivateKey, PrivateKeyLength, &pBI_PrivateKey); /* * 3. Calculate PublicKey = G^PrivateKey (mod P) * - BigInteger Operation * - Montgomery reduction */ BigInteger_Montgomery_ExpMod(pBI_G, pBI_PrivateKey, pBI_P, &pBI_PublicKey); /* * 4. Transfer BigInteger structure to char array */ BigInteger_BI2Bin(pBI_PublicKey, PublicKey, PublicKeyLength); BigInteger_Free(&pBI_G); BigInteger_Free(&pBI_P); BigInteger_Free(&pBI_PrivateKey); BigInteger_Free(&pBI_PublicKey); } /* End of DH_PublicKey_Generate */ /* ======================================================================== Routine Description: Diffie-Hellman secret key generation Arguments: PublicKey Public key PublicKeyLength The length of Public key in bytes PValue Array in UINT8 PValueLength The length of P in bytes PrivateKey Private key PrivateKeyLength The length of Private key in bytes Return Value: SecretKey Secret key SecretKeyLength The length of secret key in bytes Note: Reference to RFC2631 SecretKey = PublicKey^PrivateKey (mod P) ======================================================================== */ void DH_SecretKey_Generate ( IN UINT8 PublicKey[], IN UINT PublicKeyLength, IN UINT8 PValue[], IN UINT PValueLength, IN UINT8 PrivateKey[], IN UINT PrivateKeyLength, OUT UINT8 SecretKey[], INOUT UINT *SecretKeyLength) { PBIG_INTEGER pBI_P = NULL; PBIG_INTEGER pBI_SecretKey = NULL; PBIG_INTEGER pBI_PrivateKey = NULL; PBIG_INTEGER pBI_PublicKey = NULL; /* * 1. Check the input parameters * - PublicKeyLength, PValueLength and PrivateLength must be large than zero * - SecretKeyLength must be large or equal than PValueLength * - PValue must be odd * * - PValue must be prime number (no implement) */ if (PublicKeyLength == 0) { DBGPRINT(RT_DEBUG_ERROR, ("DH_SecretKey_Generate: public key length is (%d)\n", PublicKeyLength)); return; } /* End of if */ if (PValueLength == 0) { DBGPRINT(RT_DEBUG_ERROR, ("DH_SecretKey_Generate: P length is (%d)\n", PValueLength)); return; } /* End of if */ if (PrivateKeyLength == 0) { DBGPRINT(RT_DEBUG_ERROR, ("DH_SecretKey_Generate: private key length is (%d)\n", PrivateKeyLength)); return; } /* End of if */ if (*SecretKeyLength < PValueLength) { DBGPRINT(RT_DEBUG_ERROR, ("DH_SecretKey_Generate: secret key length(%d) must be large or equal than P length(%d)\n", *SecretKeyLength, PValueLength)); return; } /* End of if */ if (!(PValue[PValueLength - 1] & 0x1)) { DBGPRINT(RT_DEBUG_ERROR, ("DH_SecretKey_Generate: P value must be odd\n")); return; } /* End of if */ /* * 2. Transfer parameters to BigInteger structure */ BigInteger_Init(&pBI_P); BigInteger_Init(&pBI_PrivateKey); BigInteger_Init(&pBI_PublicKey); BigInteger_Init(&pBI_SecretKey); BigInteger_Bin2BI(PublicKey, PublicKeyLength, &pBI_PublicKey); BigInteger_Bin2BI(PValue, PValueLength, &pBI_P); BigInteger_Bin2BI(PrivateKey, PrivateKeyLength, &pBI_PrivateKey); /* * 3. Calculate SecretKey = PublicKey^PrivateKey (mod P) * - BigInteger Operation * - Montgomery reduction */ BigInteger_Montgomery_ExpMod(pBI_PublicKey, pBI_PrivateKey, pBI_P, &pBI_SecretKey); /* * 4. Transfer BigInteger structure to char array */ BigInteger_BI2Bin(pBI_SecretKey, SecretKey, SecretKeyLength); BigInteger_Free(&pBI_P); BigInteger_Free(&pBI_PrivateKey); BigInteger_Free(&pBI_PublicKey); BigInteger_Free(&pBI_SecretKey); } /* End of DH_SecretKey_Generate */ #endif /* CRYPT_GPL_ALGORITHM */