Files
Linux_Drivers/osdrv/extdrv/wireless/mediatek/mt7603/common/crypt_dh.c
forum_service 213c880673 add driver of tp、wiegand-gpio and wireless
Change-Id: Ie3c11d9d85cf1a05042f5690ac711856fe8b1ad7
2023-12-22 09:56:05 +08:00

4761 lines
105 KiB
C

/****************************************************************************
* Ralink Tech Inc.
* Taiwan, R.O.C.
*
* (c) Copyright 2002, Ralink Technology, Inc.
*
* All rights reserved. Ralink's source code is an unpublished work and the
* use of a copyright notice does not imply otherwise. This source code
* contains confidential trade secret material of Ralink Tech. Any attemp
* or participation in deciphering, decoding, reverse engineering or in any
* way altering the source code is stricitly prohibited, unless the prior
* written consent of Ralink Technology, Inc. is obtained.
***************************************************************************/
#ifndef CRYPT_GPL_ALGORITHM
/****************************************************************************
Module Name:
DH
Abstract:
RFC 2631: Diffie-Hellman Key Agreement Method
Revision History:
Who When What
-------- ---------- ------------------------------------------
Eddy 2009/01/21 Create Diffie-Hellman
***************************************************************************/
#endif /* CRYPT_GPL_ALGORITHM */
#include "crypt_dh.h"
#include "crypt_biginteger.h"
#ifdef CRYPT_GPL_ALGORITHM
/*dh_key.h START*/
#define GETPID_IS_MEANINGLESS
#ifndef DHPRINT
#define DHPRINT(fmt, args...) DBGPRINT(RT_DEBUG_OFF, (fmt, ## args))
#endif
typedef struct bignum_st BIGNUM;
typedef struct bn_mont_ctx_st BN_MONT_CTX;
typedef struct env_md_st EVP_MD;
typedef struct env_md_ctx_st EVP_MD_CTX;
typedef struct dh_st DH;
#define BN_MUL_COMBA
#define BN_SQR_COMBA
#define BN_ULLONG unsigned long
#define BN_ULONG unsigned int
#define BN_BITS 64
#define BN_BYTES 4
#define BN_BITS2 32
#define BN_BITS4 16
#define BN_MASK (0xffffffffffffffffLL)
#define BN_MASK2 (0xffffffffL)
#define BN_MASK2l (0xffff)
#define BN_MASK2h1 (0xffff8000L)
#define BN_MASK2h (0xffff0000L)
#define BN_TBIT (0x80000000L)
#define BN_FLG_MALLOCED 0x01
#define BN_FLG_STATIC_DATA 0x02
#define BN_FLG_EXP_CONSTTIME 0x04 /* avoid leaking exponent information through timings
* (BN_mod_exp_mont() will call BN_mod_exp_mont_consttime) */
#define BN_set_flags(b,n) ((b)->flags|=(n))
#define BN_get_flags(b,n) ((b)->flags&(n))
/* get a clone of a BIGNUM with changed flags, for *temporary* use only
* (the two BIGNUMs cannot not be used in parallel!) */
#define BN_with_flags(dest,b,n) ((dest)->d=(b)->d, \
(dest)->top=(b)->top, \
(dest)->dmax=(b)->dmax, \
(dest)->flags=(((dest)->flags & BN_FLG_MALLOCED) \
| ((b)->flags & ~BN_FLG_MALLOCED) \
| BN_FLG_STATIC_DATA \
| (n)))
struct bignum_st
{
BN_ULONG *d; /* Pointer to an array of 'BN_BITS2' bit chunks. */
int top; /* Index of last used d +1. */
/* The next are internal book keeping for bn_expand. */
int dmax; /* Size of the d array. */
int flags;
};
/* Used for montgomery multiplication */
struct bn_mont_ctx_st
{
int ri; /* number of bits in R */
BIGNUM RR; /* used to convert to montgomery form */
BIGNUM N; /* The modulus */
BN_ULONG n0; /* least significant word of Ni */
int flags;
};
#define BN_num_bytes(a) ((BN_num_bits(a)+7)/8)
#define BN_is_zero(a) ((a)->top == 0)
#define BN_is_one(a) (((a)->top == 1) && ((a)->d[0] == (BN_ULONG)(1)))
#define BN_is_odd(a) (((a)->top > 0) && ((a)->d[0] & 1))
#define BN_one(a) (BN_set_word((a),1))
#define BN_zero(a) (BN_set_word((a),0))
#define bn_wexpand(a,words) (((words) <= (a)->dmax)?(a):bn_expand2((a),(words)))
#define bn_correct_top(a) \
{ \
BN_ULONG *ftl; \
if ((a)->top > 0) \
{ \
for (ftl= &((a)->d[(a)->top-1]); (a)->top > 0; (a)->top--) \
if (*(ftl--)) break; \
} \
}
/*
* BN_window_bits_for_exponent_size -- macro for sliding window mod_exp functions
*
*
* For window size 'w' (w >= 2) and a random 'b' bits exponent,
* the number of multiplications is a constant plus on average
*
* 2^(w-1) + (b-w)/(w+1);
*
* here 2^(w-1) is for precomputing the table (we actually need
* entries only for windows that have the lowest bit set), and
* (b-w)/(w+1) is an approximation for the expected number of
* w-bit windows, not counting the first one.
*
* Thus we should use
*
* w >= 6 if b > 671
* w = 5 if 671 > b > 239
* w = 4 if 239 > b > 79
* w = 3 if 79 > b > 23
* w <= 2 if 23 > b
*
* (with draws in between). Very small exponents are often selected
* with low Hamming weight, so we use w = 1 for b <= 23.
*/
#define BN_window_bits_for_exponent_size(b) \
((b) > 671 ? 6 : \
(b) > 239 ? 5 : \
(b) > 79 ? 4 : \
(b) > 23 ? 3 : 1)
/* BN_mod_exp_mont_conttime is based on the assumption that the
* L1 data cache line width of the target processor is at least
* the following value.
*/
#define MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH ( 64 )
#define MOD_EXP_CTIME_MIN_CACHE_LINE_MASK (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - 1)
/* Window sizes optimized for fixed window size modular exponentiation
* algorithm (BN_mod_exp_mont_consttime).
*
* To achieve the security goals of BN_mode_exp_mont_consttime, the
* maximum size of the window must not exceed
* log_2(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH).
*
* Window size thresholds are defined for cache line sizes of 32 and 64,
* cache line sizes where log_2(32)=5 and log_2(64)=6 respectively. A
* window size of 7 should only be used on processors that have a 128
* byte or greater cache line size.
*/
#if MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 64
# define BN_window_bits_for_ctime_exponent_size(b) \
((b) > 937 ? 6 : \
(b) > 306 ? 5 : \
(b) > 89 ? 4 : \
(b) > 22 ? 3 : 1)
# define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (6)
#elif MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 32
# define BN_window_bits_for_ctime_exponent_size(b) \
((b) > 306 ? 5 : \
(b) > 89 ? 4 : \
(b) > 22 ? 3 : 1)
# define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (5)
#endif
#define BN_MULL_SIZE_NORMAL (16) /* 32 */
#define BN_MUL_RECURSIVE_SIZE_NORMAL (16) /* 32 less than */
#define BN_SQR_RECURSIVE_SIZE_NORMAL (16) /* 32 */
/*************************************************************
* Using the long long type
*/
#define Lw(t) (((BN_ULONG)(t))&BN_MASK2)
#define Hw(t) (((BN_ULONG)((t)>>BN_BITS2))&BN_MASK2)
/*************************************************************
* No long long type
*/
#define LBITS(a) ((a)&BN_MASK2l)
#define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l)
#define L2HBITS(a) (((a)<<BN_BITS4)&BN_MASK2)
#define LLBITS(a) ((a)&BN_MASKl)
#define LHBITS(a) (((a)>>BN_BITS2)&BN_MASKl)
#define LL2HBITS(a) ((BN_ULLONG)((a)&BN_MASKl)<<BN_BITS2)
#define mul64(l,h,bl,bh) \
{ \
BN_ULONG m,m1,lt,ht; \
\
lt=l; \
ht=h; \
m =(bh)*(lt); \
lt=(bl)*(lt); \
m1=(bl)*(ht); \
ht =(bh)*(ht); \
m=(m+m1)&BN_MASK2; if (m < m1) ht+=L2HBITS((BN_ULONG)1); \
ht+=HBITS(m); \
m1=L2HBITS(m); \
lt=(lt+m1)&BN_MASK2; if (lt < m1) ht++; \
(l)=lt; \
(h)=ht; \
}
#define sqr64(lo,ho,in) \
{ \
BN_ULONG l,h,m; \
\
h=(in); \
l=LBITS(h); \
h=HBITS(h); \
m =(l)*(h); \
l*=l; \
h*=h; \
h+=(m&BN_MASK2h1)>>(BN_BITS4-1); \
m =(m&BN_MASK2l)<<(BN_BITS4+1); \
l=(l+m)&BN_MASK2; if (l < m) h++; \
(lo)=l; \
(ho)=h; \
}
#define mul_add(r,a,bl,bh,c) { \
BN_ULONG l,h; \
\
h= (a); \
l=LBITS(h); \
h=HBITS(h); \
mul64(l,h,(bl),(bh)); \
\
/* non-multiply part */ \
l=(l+(c))&BN_MASK2; if (l < (c)) h++; \
(c)=(r); \
l=(l+(c))&BN_MASK2; if (l < (c)) h++; \
(c)=h&BN_MASK2; \
(r)=l; \
}
#define mul(r,a,bl,bh,c) { \
BN_ULONG l,h; \
\
h= (a); \
l=LBITS(h); \
h=HBITS(h); \
mul64(l,h,(bl),(bh)); \
\
/* non-multiply part */ \
l+=(c); if ((l&BN_MASK2) < (c)) h++; \
(c)=h&BN_MASK2; \
(r)=l&BN_MASK2; \
}
#define INIT_DATA_h0 0x67452301UL
#define INIT_DATA_h1 0xefcdab89UL
#define INIT_DATA_h2 0x98badcfeUL
#define INIT_DATA_h3 0x10325476UL
#define INIT_DATA_h4 0xc3d2e1f0UL
#define STATE_SIZE 1023
#define EXP_TABLE_SIZE 32
#define BN_mod(rem,m,d,ctx) BN_div(NULL,(rem),(m),(d),(ctx))
#define BN_CTX_POOL_SIZE 16
#define MOD_EXP_CTIME_ALIGN(x_) ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((BN_ULONG)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
#define ENTROPY_NEEDED 32 /* require 256 bits = 32 bytes of randomness */
#define DH_FLAG_CACHE_MONT_P 0x01
#define DH_FLAG_NO_EXP_CONSTTIME 0x02 /* new with 0.9.7h; the built-in DH
* implementation now uses constant time
* modular exponentiation for secret exponents
* by default. This flag causes the
* faster variable sliding window method to
* be used for all exponents.
*/
#define OPENSSL_DH_MAX_MODULUS_BITS 10000
struct dh_st
{
BIGNUM *p;
BIGNUM *g;
BIGNUM *pub_key; /* g^x */
BIGNUM *priv_key; /* x */
int flags;
};
struct env_md_st
{
int type;
int md_size;
unsigned long flags;
int (*init)(EVP_MD_CTX *ctx);
int (*update)(EVP_MD_CTX *ctx,const void *data,size_t count);
int (*final)(EVP_MD_CTX *ctx,unsigned char *md);
int ctx_size; /* how big does the ctx->md_data need to be */
} /* EVP_MD */;
struct env_md_ctx_st
{
const EVP_MD *digest;
void *md_data;
} /* EVP_MD_CTX */;
/* A bundle of bignums that can be linked with other bundles */
typedef struct bignum_pool_item
{
/* The bignum values */
BIGNUM vals[BN_CTX_POOL_SIZE];
/* Linked-list admin */
struct bignum_pool_item *prev, *next;
} BN_POOL_ITEM;
/* A linked-list of bignums grouped in bundles */
typedef struct bignum_pool
{
/* Linked-list admin */
BN_POOL_ITEM *head, *currentvar, *tail;
/* Stack depth and allocation size */
unsigned used, size;
} BN_POOL;
/*dh_key.h END*/
/*sha_locl.h START*/
#define SHA_LONG unsigned int
#define SHA_LBLOCK 16
#define SHA_CBLOCK (SHA_LBLOCK*4) /* SHA treats input data as a contiguous array of 32 bit wide big-endian values. */
#define SHA_DIGEST_LENGTH 20
typedef struct SHAstate_st
{
SHA_LONG h0,h1,h2,h3,h4;
SHA_LONG Nl,Nh;
SHA_LONG data[SHA_LBLOCK];
unsigned int num;
} OPEN_SHA_CTX;
#define Xupdate(a,ix,ia,ib,ic,id) ((a)=(ia^ib^ic^id), ix=(a)=ROTATE((a),1))
#ifndef ROTATE
#define ROTATE(a,n) (((a)<<(n))|(((a)&0xffffffff)>>(32-(n))))
#endif
#define HASH_MAKE_STRING(c,s) do { \
unsigned long ll; \
ll=(c)->h0; HOST_l2c(ll,(s)); \
ll=(c)->h1; HOST_l2c(ll,(s)); \
ll=(c)->h2; HOST_l2c(ll,(s)); \
ll=(c)->h3; HOST_l2c(ll,(s)); \
ll=(c)->h4; HOST_l2c(ll,(s)); \
} while (0)
#ifndef HOST_c2l
#define HOST_c2l(c,l) (l =(((unsigned long)(*((c)++)))<<24), \
l|=(((unsigned long)(*((c)++)))<<16), \
l|=(((unsigned long)(*((c)++)))<< 8), \
l|=(((unsigned long)(*((c)++))) ), \
l)
#endif
#define HOST_p_c2l(c,l,n) { \
switch (n) { \
case 0: l =((unsigned long)(*((c)++)))<<24; \
case 1: l|=((unsigned long)(*((c)++)))<<16; \
case 2: l|=((unsigned long)(*((c)++)))<< 8; \
case 3: l|=((unsigned long)(*((c)++))); \
} }
#define HOST_p_c2l_p(c,l,sc,len) { \
switch (sc) { \
case 0: l =((unsigned long)(*((c)++)))<<24; \
if (--len == 0) break; \
case 1: l|=((unsigned long)(*((c)++)))<<16; \
if (--len == 0) break; \
case 2: l|=((unsigned long)(*((c)++)))<< 8; \
} }
/* NOTE the pointer is not incremented at the end of this */
#define HOST_c2l_p(c,l,n) { \
l=0; (c)+=n; \
switch (n) { \
case 3: l =((unsigned long)(*(--(c))))<< 8; \
case 2: l|=((unsigned long)(*(--(c))))<<16; \
case 1: l|=((unsigned long)(*(--(c))))<<24; \
} }
#ifndef HOST_l2c
#define HOST_l2c(l,c) (*((c)++)=(unsigned char)(((l)>>24)&0xff), \
*((c)++)=(unsigned char)(((l)>>16)&0xff), \
*((c)++)=(unsigned char)(((l)>> 8)&0xff), \
*((c)++)=(unsigned char)(((l) )&0xff), \
l)
#endif
#define K_00_19 0x5a827999UL
#define K_20_39 0x6ed9eba1UL
#define K_40_59 0x8f1bbcdcUL
#define K_60_79 0xca62c1d6UL
/* As pointed out by Wei Dai <weidai@eskimo.com>, F() below can be
* simplified to the code in F_00_19. Wei attributes these optimisations
* to Peter Gutmann's SHS code, and he attributes it to Rich Schroeppel.
* #define F(x,y,z) (((x) & (y)) | ((~(x)) & (z)))
* I've just become aware of another tweak to be made, again from Wei Dai,
* in F_40_59, (x&a)|(y&a) -> (x|y)&a
*/
#define F_00_19(b,c,d) ((((c) ^ (d)) & (b)) ^ (d))
#define F_20_39(b,c,d) ((b) ^ (c) ^ (d))
#define F_40_59(b,c,d) (((b) & (c)) | (((b)|(c)) & (d)))
#define F_60_79(b,c,d) F_20_39(b,c,d)
#define BODY_00_15(xi) do { \
T=E+K_00_19+F_00_19(B,C,D); \
E=D, D=C, C=ROTATE(B,30), B=A; \
A=ROTATE(A,5)+T+xi; } while(0)
#define BODY_16_19(xa,xb,xc,xd) do { \
Xupdate(T,xa,xa,xb,xc,xd); \
T+=E+K_00_19+F_00_19(B,C,D); \
E=D, D=C, C=ROTATE(B,30), B=A; \
A=ROTATE(A,5)+T; } while(0)
#define BODY_20_39(xa,xb,xc,xd) do { \
Xupdate(T,xa,xa,xb,xc,xd); \
T+=E+K_20_39+F_20_39(B,C,D); \
E=D, D=C, C=ROTATE(B,30), B=A; \
A=ROTATE(A,5)+T; } while(0)
#define BODY_40_59(xa,xb,xc,xd) do { \
Xupdate(T,xa,xa,xb,xc,xd); \
T+=E+K_40_59+F_40_59(B,C,D); \
E=D, D=C, C=ROTATE(B,30), B=A; \
A=ROTATE(A,5)+T; } while(0)
#define BODY_60_79(xa,xb,xc,xd) do { \
Xupdate(T,xa,xa,xb,xc,xd); \
T=E+K_60_79+F_60_79(B,C,D); \
E=D, D=C, C=ROTATE(B,30), B=A; \
A=ROTATE(A,5)+T+xa; } while(0)
static void sha1_block_host_order (OPEN_SHA_CTX *c, const void *d, size_t num)
{
const SHA_LONG *W=d;
register unsigned long A,B,C,D,E,T;
int i;
SHA_LONG X[16];
A=c->h0;
B=c->h1;
C=c->h2;
D=c->h3;
E=c->h4;
for (;;)
{
for (i=0;i<16;i++)
{ X[i]=W[i]; BODY_00_15(X[i]); }
for (i=0;i<4;i++)
{ BODY_16_19(X[i], X[i+2], X[i+8], X[(i+13)&15]); }
for (;i<24;i++)
{ BODY_20_39(X[i&15], X[(i+2)&15], X[(i+8)&15],X[(i+13)&15]); }
for (i=0;i<20;i++)
{ BODY_40_59(X[(i+8)&15],X[(i+10)&15],X[i&15], X[(i+5)&15]); }
for (i=4;i<24;i++)
{ BODY_60_79(X[(i+8)&15],X[(i+10)&15],X[i&15], X[(i+5)&15]); }
c->h0=(c->h0+A)&0xffffffffL;
c->h1=(c->h1+B)&0xffffffffL;
c->h2=(c->h2+C)&0xffffffffL;
c->h3=(c->h3+D)&0xffffffffL;
c->h4=(c->h4+E)&0xffffffffL;
if (--num == 0) break;
A=c->h0;
B=c->h1;
C=c->h2;
D=c->h3;
E=c->h4;
W+=SHA_LBLOCK;
}
}
static void sha1_block_data_order (OPEN_SHA_CTX *c, const void *p, size_t num)
{
const unsigned char *data=p;
register unsigned long A,B,C,D,E,T,l;
int i;
SHA_LONG X[16];
A=c->h0;
B=c->h1;
C=c->h2;
D=c->h3;
E=c->h4;
for (;;)
{
for (i=0;i<16;i++)
{ HOST_c2l(data,l); X[i]=l; BODY_00_15(X[i]); }
for (i=0;i<4;i++)
{ BODY_16_19(X[i], X[i+2], X[i+8], X[(i+13)&15]); }
for (;i<24;i++)
{ BODY_20_39(X[i&15], X[(i+2)&15], X[(i+8)&15],X[(i+13)&15]); }
for (i=0;i<20;i++)
{ BODY_40_59(X[(i+8)&15],X[(i+10)&15],X[i&15], X[(i+5)&15]); }
for (i=4;i<24;i++)
{ BODY_60_79(X[(i+8)&15],X[(i+10)&15],X[i&15], X[(i+5)&15]); }
c->h0=(c->h0+A)&0xffffffffL;
c->h1=(c->h1+B)&0xffffffffL;
c->h2=(c->h2+C)&0xffffffffL;
c->h3=(c->h3+D)&0xffffffffL;
c->h4=(c->h4+E)&0xffffffffL;
if (--num == 0) break;
A=c->h0;
B=c->h1;
C=c->h2;
D=c->h3;
E=c->h4;
}
}
/*sha_locl.h END*/
#define NID_sha1 64
static int g_state_num=0,g_state_index=0;
static unsigned char g_md[SHA_DIGEST_LENGTH];
static unsigned char g_state[STATE_SIZE+SHA_DIGEST_LENGTH];
static long g_md_count[2]={0,0};
static long g_entropy=0;
static int RAND_initialized=0;
static BIGNUM *bn_expand2(BIGNUM *b, int words);
static int HASH_FINAL (unsigned char *md, OPEN_SHA_CTX*c);
static int HASH_UPDATE (OPEN_SHA_CTX *c, const void *data_, size_t len);
static int HASH_INIT (OPEN_SHA_CTX *c);
static int RAND_poll(void);
static int RAND_init(EVP_MD_CTX *ctx)
{ return HASH_INIT(ctx->md_data); }
static int RAND_update(EVP_MD_CTX *ctx,const void *data,size_t count)
{ return HASH_UPDATE(ctx->md_data,data,count); }
static int RAND_final(EVP_MD_CTX *ctx,unsigned char *md)
{ return HASH_FINAL(md,ctx->md_data); }
static void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb);
#ifdef BN_RECURSION
static void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
int dna, int dnb, BN_ULONG *t);
static void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n,
int tna, int tnb, BN_ULONG *t);
static void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b);
#endif /* BN_RECURSION */
static int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
const BIGNUM *m, BN_POOL *ctx, BN_MONT_CTX *in_mont);
static int BN_rshift1(BIGNUM *r, const BIGNUM *a);
static BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w);
static BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w);
static BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n);
static BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n);
static void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b);
static void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a);
static void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a);
static int HASH_FINAL (unsigned char *md, OPEN_SHA_CTX*c)
{
register SHA_LONG *p;
register unsigned long l;
register int i,j;
static const unsigned char end[4]={0x80,0x00,0x00,0x00};
const unsigned char *cp=end;
/* c->num should definitly have room for at least one more byte. */
p=c->data;
i=c->num>>2;
j=c->num&0x03;
l = (j==0) ? 0 : p[i];
HOST_p_c2l(cp,l,j); p[i++]=l; /* i is the next 'undefined word' */
if (i>(SHA_LBLOCK-2)) /* save room for Nl and Nh */
{
if (i<SHA_LBLOCK) p[i]=0;
sha1_block_host_order (c,p,1);
i=0;
}
for (; i<(SHA_LBLOCK-2); i++)
p[i]=0;
p[SHA_LBLOCK-2]=c->Nh;
p[SHA_LBLOCK-1]=c->Nl;
sha1_block_host_order (c,p,1);
#ifndef HASH_MAKE_STRING
#error "HASH_MAKE_STRING must be defined!"
#else
HASH_MAKE_STRING(c,md);
#endif
c->num=0;
return 1;
}
static int HASH_UPDATE (OPEN_SHA_CTX *c, const void *data_, size_t len)
{
const unsigned char *data=data_;
register SHA_LONG * p;
register SHA_LONG l;
size_t sw,sc,ew,ec;
if (len==0) return 1;
l=(c->Nl+(((SHA_LONG)len)<<3))&0xffffffffUL;
/* 95-05-24 eay Fixed a bug with the overflow handling, thanks to
* Wei Dai <weidai@eskimo.com> for pointing it out. */
if (l < c->Nl) /* overflow */
c->Nh++;
c->Nh+=(len>>29); /* might cause compiler warning on 16-bit */
c->Nl=l;
if (c->num != 0)
{
p=c->data;
sw=c->num>>2;
sc=c->num&0x03;
if ((c->num+len) >= SHA_CBLOCK)
{
l=p[sw]; HOST_p_c2l(data,l,sc); p[sw++]=l;
for (; sw<SHA_LBLOCK; sw++)
{
HOST_c2l(data,l); p[sw]=l;
}
sha1_block_host_order(c,p,1);
len-=(SHA_CBLOCK-c->num);
c->num=0;
/* drop through and do the rest */
}
else
{
c->num+=(unsigned int)len;
if ((sc+len) < 4) /* ugly, add char's to a word */
{
l=p[sw]; HOST_p_c2l_p(data,l,sc,len); p[sw]=l;
}
else
{
ew=(c->num>>2);
ec=(c->num&0x03);
if (sc)
l=p[sw];
HOST_p_c2l(data,l,sc);
p[sw++]=l;
for (; sw < ew; sw++)
{
HOST_c2l(data,l); p[sw]=l;
}
if (ec)
{
HOST_c2l_p(data,l,ec); p[sw]=l;
}
}
return 1;
}
}
sw=len/SHA_CBLOCK;
if (sw > 0)
{
{
sha1_block_data_order(c,data,sw);
sw*=SHA_CBLOCK;
data+=sw;
len-=sw;
}
}
if (len!=0)
{
p = c->data;
c->num = len;
ew=len>>2; /* words to copy */
ec=len&0x03;
for (; ew; ew--,p++)
{
HOST_c2l(data,l); *p=l;
}
HOST_c2l_p(data,l,ec);
*p=l;
}
return 1;
}
static int HASH_INIT (OPEN_SHA_CTX *c)
{
c->h0=INIT_DATA_h0;
c->h1=INIT_DATA_h1;
c->h2=INIT_DATA_h2;
c->h3=INIT_DATA_h3;
c->h4=INIT_DATA_h4;
c->Nl=0;
c->Nh=0;
c->num=0;
return 1;
}
static BN_POOL *BN_CTX_new(void)
{
BN_POOL *ret;
os_alloc_mem(NULL, (UCHAR **)&ret, sizeof(BN_POOL));
if(!ret)
{
return NULL;
}
/* Initialise the structure */
ret->head = ret->currentvar = ret->tail = NULL;
ret->used = ret->size = 0;
return ret;
}
static const BIGNUM *BN_value_one(void)
{
static BN_ULONG data_one=1L;
static BIGNUM const_one={&data_one,1,1,BN_FLG_STATIC_DATA};
return(&const_one);
}
static int BN_num_bits_word(BN_ULONG l)
{
static const char bits[256]={
0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
};
{
if (l & 0xffff0000L)
{
if (l & 0xff000000L)
return(bits[(int)(l>>24L)]+24);
else return(bits[(int)(l>>16L)]+16);
}
else
{
if (l & 0xff00L)
return(bits[(int)(l>>8)]+8);
else
return(bits[(int)(l )] );
}
}
}
static int BN_num_bits(const BIGNUM *a)
{
int i = a->top - 1;
if (BN_is_zero(a)) return 0;
return ((i*BN_BITS2) + BN_num_bits_word(a->d[i]));
}
static void BN_clear_free(BIGNUM *a)
{
int i;
if (a == NULL) return;
if (a->d != NULL)
{
if (!(BN_get_flags(a,BN_FLG_STATIC_DATA)))
os_free_mem(NULL, a->d);
}
i=BN_get_flags(a,BN_FLG_MALLOCED);
if (i)
os_free_mem(NULL, a);
}
static void BN_free(BIGNUM *a)
{
if (a == NULL) return;
if ((a->d != NULL) && !(BN_get_flags(a,BN_FLG_STATIC_DATA)))
os_free_mem(NULL, a->d);
if (a->flags & BN_FLG_MALLOCED)
os_free_mem(NULL, a);
else
{
a->d = NULL;
}
}
static void BN_init(BIGNUM *a)
{
memset(a,0,sizeof(BIGNUM));
}
static BIGNUM *BN_new(void)
{
BIGNUM *ret;
os_alloc_mem(NULL, (UCHAR **)&ret, sizeof(BIGNUM));
if (ret == NULL)
{
return(NULL);
}
ret->flags=BN_FLG_MALLOCED;
ret->top=0;
ret->dmax=0;
ret->d=NULL;
return(ret);
}
/* This is used both by bn_expand2() and bn_dup_expand() */
/* The caller MUST check that words > b->dmax before calling this */
static BN_ULONG *bn_expand_internal(const BIGNUM *b, int words)
{
BN_ULONG *A,*a = NULL;
const BN_ULONG *B;
int i;
if (words > (INT_MAX/(4*BN_BITS2)))
{
return NULL;
}
if (BN_get_flags(b,BN_FLG_STATIC_DATA))
{
return(NULL);
}
os_alloc_mem(NULL, (UCHAR **)&A, sizeof(BN_ULONG)*words);
a = A;
if (A == NULL)
{
return(NULL);
}
B=b->d;
/* Check if the previous number needs to be copied */
if (B != NULL)
{
for (i=b->top>>2; i>0; i--,A+=4,B+=4)
{
/*
* The fact that the loop is unrolled
* 4-wise is a tribute to Intel. It's
* the one that doesn't have enough
* registers to accomodate more data.
* I'd unroll it 8-wise otherwise:-)
*
* <appro@fy.chalmers.se>
*/
BN_ULONG a0,a1,a2,a3;
a0=B[0]; a1=B[1]; a2=B[2]; a3=B[3];
A[0]=a0; A[1]=a1; A[2]=a2; A[3]=a3;
}
switch (b->top&3)
{
case 3: A[2]=B[2];
case 2: A[1]=B[1];
case 1: A[0]=B[0];
case 0: /* workaround for ultrix cc: without 'case 0', the optimizer does
* the switch table by doing a=top&3; a--; goto jump_table[a];
* which fails for top== 0 */
;
}
}
return(a);
}
/* This is an internal function that should not be used in applications.
* It ensures that 'b' has enough room for a 'words' word number
* and initialises any unused part of b->d with leading zeros.
* It is mostly used by the various BIGNUM routines. If there is an error,
* NULL is returned. If not, 'b' is returned. */
static BIGNUM *bn_expand2(BIGNUM *b, int words)
{
if (words > b->dmax)
{
BN_ULONG *a = bn_expand_internal(b, words);
if(!a)
return NULL;
if(b->d)
os_free_mem(NULL, b->d);
b->d=a;
b->dmax=words;
}
return b;
}
static void BN_clear(BIGNUM *a)
{
if (a->d != NULL)
memset(a->d,0,a->dmax*sizeof(a->d[0]));
a->top=0;
}
static int BN_set_word(BIGNUM *a, BN_ULONG w)
{
if (NULL == ((1 <= (a)->dmax)?(a):bn_expand2((a),1)))
{
DHPRINT("BN_set_word NULL\n");
return(0);
}
a->d[0] = w;
a->top = (w ? 1 : 0);
return(1);
}
static BIGNUM *BN_bin2bn(const unsigned char *s, int len, BIGNUM *ret)
{
unsigned int i,m;
unsigned int n;
BN_ULONG l;
BIGNUM *bn = NULL;
if (ret == NULL)
ret = bn = BN_new();
if (ret == NULL) return(NULL);
l=0;
n=len;
if (n == 0)
{
ret->top=0;
return(ret);
}
i=((n-1)/BN_BYTES)+1;
m=((n-1)%(BN_BYTES));
if (bn_wexpand(ret, (int)i) == NULL)
{
BN_free(bn);
return NULL;
}
ret->top=i;
while (n--)
{
l=(l<<8L)| *(s++);
if (m-- == 0)
{
ret->d[--i]=l;
l=0;
m=BN_BYTES-1;
}
}
/* need to call this due to clear byte at top if avoiding
* having the top bit set (-ve number) */
bn_correct_top(ret);
return(ret);
}
static int BN_ucmp(const BIGNUM *a, const BIGNUM *b)
{
int i;
BN_ULONG t1,t2,*ap,*bp;
i=a->top-b->top;
if (i != 0) return(i);
ap=a->d;
bp=b->d;
for (i=a->top-1; i>=0; i--)
{
t1= ap[i];
t2= bp[i];
if (t1 != t2)
return((t1 > t2) ? 1 : -1);
}
return(0);
}
/* ignore negative */
static int BN_bn2bin(const BIGNUM *a, unsigned char *to)
{
int n,i;
BN_ULONG l;
n=i=BN_num_bytes(a);
while (i--)
{
l=a->d[i/BN_BYTES];
*(to++)=(unsigned char)(l>>(8*(i%BN_BYTES)))&0xff;
}
return(n);
}
#ifdef BN_RECURSION
static int BN_cmp(const BIGNUM *a, const BIGNUM *b)
{
int i;
int gt,lt;
BN_ULONG t1,t2;
if ((a == NULL) || (b == NULL))
{
if (a != NULL)
return(-1);
else if (b != NULL)
return(1);
else
return(0);
}
{ gt=1; lt= -1; }
if (a->top > b->top) return(gt);
if (a->top < b->top) return(lt);
for (i=a->top-1; i>=0; i--)
{
t1=a->d[i];
t2=b->d[i];
if (t1 > t2) return(gt);
if (t1 < t2) return(lt);
}
return(0);
}
#endif /*BN_RECURSION */
static int BN_set_bit(BIGNUM *a, int n)
{
int i,j,k;
if (n < 0)
return 0;
i=n/BN_BITS2;
j=n%BN_BITS2;
if (a->top <= i)
{
if (bn_wexpand(a,i+1) == NULL) return(0);
for(k=a->top; k<i+1; k++)
a->d[k]=0;
a->top=i+1;
}
a->d[i]|=(((BN_ULONG)1)<<j);
return(1);
}
static int BN_is_bit_set(const BIGNUM *a, int n)
{
int i,j;
if (n < 0) return 0;
i=n/BN_BITS2;
j=n%BN_BITS2;
if (a->top <= i) return 0;
return((a->d[i]&(((BN_ULONG)1)<<j))?1:0);
}
#ifdef BN_RECURSION
static int bn_cmp_words(const BN_ULONG *a, const BN_ULONG *b, int n)
{
int i;
BN_ULONG aa,bb;
aa=a[n-1];
bb=b[n-1];
if (aa != bb) return((aa > bb)?1:-1);
for (i=n-2; i>=0; i--)
{
aa=a[i];
bb=b[i];
if (aa != bb) return((aa > bb)?1:-1);
}
return(0);
}
/* Here follows a specialised variants of bn_cmp_words(). It has the
property of performing the operation on arrays of different sizes.
The sizes of those arrays is expressed through cl, which is the
common length ( basicall, min(len(a),len(b)) ), and dl, which is the
delta between the two lengths, calculated as len(a)-len(b).
All lengths are the number of BN_ULONGs... */
static int bn_cmp_part_words(const BN_ULONG *a, const BN_ULONG *b,
int cl, int dl)
{
int n,i;
n = cl-1;
if (dl < 0)
{
for (i=dl; i<0; i++)
{
if (b[n-i] != 0)
return -1; /* a < b */
}
}
if (dl > 0)
{
for (i=dl; i>0; i--)
{
if (a[n+i] != 0)
return 1; /* a > b */
}
}
return bn_cmp_words(a,b,cl);
}
#endif /*BN_RECURSION */
static int BN_rshift1(BIGNUM *r, const BIGNUM *a)
{
BN_ULONG *ap,*rp,t,c;
int i;
if (BN_is_zero(a))
{
BN_zero(r);
return(1);
}
if (a != r)
{
if (bn_wexpand(r,a->top) == NULL) return(0);
r->top=a->top;
}
ap=a->d;
rp=r->d;
c=0;
for (i=a->top-1; i>=0; i--)
{
t=ap[i];
rp[i]=((t>>1)&BN_MASK2)|c;
c=(t&1)?BN_TBIT:0;
}
bn_correct_top(r);
return(1);
}
static int BN_lshift(BIGNUM *r, const BIGNUM *a, int n)
{
int i,nw,lb,rb;
BN_ULONG *t,*f;
BN_ULONG l;
nw=n/BN_BITS2;
if (bn_wexpand(r,a->top+nw+1) == NULL) return(0);
lb=n%BN_BITS2;
rb=BN_BITS2-lb;
f=a->d;
t=r->d;
t[a->top+nw]=0;
if (lb == 0)
for (i=a->top-1; i>=0; i--)
t[nw+i]=f[i];
else
for (i=a->top-1; i>=0; i--)
{
l=f[i];
t[nw+i+1]|=(l>>rb)&BN_MASK2;
t[nw+i]=(l<<lb)&BN_MASK2;
}
memset(t,0,nw*sizeof(t[0]));
/* for (i=0; i<nw; i++)
t[i]=0;*/
r->top=a->top+nw+1;
bn_correct_top(r);
return(1);
}
static int BN_rshift(BIGNUM *r, const BIGNUM *a, int n)
{
int i,j,nw,lb,rb;
BN_ULONG *t,*f;
BN_ULONG l,tmp;
nw=n/BN_BITS2;
rb=n%BN_BITS2;
lb=BN_BITS2-rb;
if (nw > a->top || a->top == 0)
{
BN_zero(r);
return(1);
}
if (r != a)
{
if (bn_wexpand(r,a->top-nw+1) == NULL) return(0);
}
else
{
if (n == 0)
return 1; /* or the copying loop will go berserk */
}
f= &(a->d[nw]);
t=r->d;
j=a->top-nw;
r->top=j;
if (rb == 0)
{
for (i=j; i != 0; i--)
*(t++)= *(f++);
}
else
{
l= *(f++);
for (i=j-1; i != 0; i--)
{
tmp =(l>>rb)&BN_MASK2;
l= *(f++);
*(t++) =(tmp|(l<<lb))&BN_MASK2;
}
*(t++) =(l>>rb)&BN_MASK2;
}
bn_correct_top(r);
return(1);
}
static void BN_POOL_release(BN_POOL *p, unsigned int num)
{
unsigned int offset = (p->used - 1) % BN_CTX_POOL_SIZE;
p->used -= num;
while(num--)
{
if(!offset)
{
offset = BN_CTX_POOL_SIZE - 1;
p->currentvar = p->currentvar->prev;
}
else
offset--;
}
}
static void BN_POOL_finish(BN_POOL *p)
{
while(p->head)
{
BIGNUM *bn=p->head->vals;
unsigned int loop = 0;
while(loop++ < BN_CTX_POOL_SIZE)
{
BIGNUM *tmp = bn; /* avoid compile warning: use freed pointer */
if(bn->d) BN_clear_free(tmp);
bn++;
}
p->currentvar = p->head->next;
os_free_mem(NULL, p->head);
p->head = p->currentvar;
}
}
static BIGNUM *BN_POOL_get(BN_POOL *p)
{
BIGNUM *ret=NULL;
if(p->used == p->size)
{
BIGNUM *bn;
unsigned int loop = 0;
BN_POOL_ITEM *item;
os_alloc_mem(NULL, (UCHAR **)&item, sizeof(BN_POOL_ITEM));
if(!item) goto end;
/* Initialise the structure */
bn = item->vals;
while(loop++ < BN_CTX_POOL_SIZE)
BN_init(bn++);
item->prev = p->tail;
item->next = NULL;
/* Link it in */
if(!p->head)
p->head = p->currentvar = p->tail = item;
else
{
p->tail->next = item;
p->tail = item;
p->currentvar = item;
}
p->size += BN_CTX_POOL_SIZE;
p->used++;
/* Return the first bignum from the new pool */
ret = item->vals;
}
if(!p->used)
p->currentvar = p->head;
else if((p->used % BN_CTX_POOL_SIZE) == 0)
p->currentvar = p->currentvar->next;
ret = p->currentvar->vals + ((p->used) % BN_CTX_POOL_SIZE);
p->used++;
end:
if(ret == NULL)
{
DHPRINT("BN_POOL_get ERROR !!!");
}
else
{
/* OK, make sure the returned bignum is "zero" */
BN_zero(ret);
}
return ret;
}
static BIGNUM *BN_copy(BIGNUM *a, const BIGNUM *b)
{
int i;
BN_ULONG *A;
const BN_ULONG *B;
if (0 == BN_ucmp(a,b)) return(a);
if (bn_wexpand(a,b->top) == NULL) return(NULL);
A=a->d;
B=b->d;
for (i=b->top>>2; i>0; i--,A+=4,B+=4)
{
BN_ULONG a0,a1,a2,a3;
a0=B[0]; a1=B[1]; a2=B[2]; a3=B[3];
A[0]=a0; A[1]=a1; A[2]=a2; A[3]=a3;
}
switch (b->top&3)
{
case 3: A[2]=B[2];
case 2: A[1]=B[1];
case 1: A[0]=B[0];
case 0: ; /* ultrix cc workaround, see comments in bn_expand_internal */
}
a->top=b->top;
return(a);
}
/* Divide h,l by d and return the result. */
/* I need to test this some more :-( */
BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
{
BN_ULONG dh,dl,q,ret=0,th,tl,t;
int i,count=2;
if (d == 0) return(BN_MASK2);
i=BN_num_bits_word(d);
i=BN_BITS2-i;
if (h >= d) h-=d;
if (i)
{
d<<=i;
h=(h<<i)|(l>>(BN_BITS2-i));
l<<=i;
}
dh=(d&BN_MASK2h)>>BN_BITS4;
dl=(d&BN_MASK2l);
for (;;)
{
if ((h>>BN_BITS4) == dh)
q=BN_MASK2l;
else
q=h/dh;
th=q*dh;
tl=dl*q;
for (;;)
{
t=h-th;
if ((t&BN_MASK2h) ||
((tl) <= (
(t<<BN_BITS4)|
((l&BN_MASK2h)>>BN_BITS4))))
break;
q--;
th-=dh;
tl-=dl;
}
t=(tl>>BN_BITS4);
tl=(tl<<BN_BITS4)&BN_MASK2h;
th+=t;
if (l < tl) th++;
l-=tl;
if (h < th)
{
h+=d;
q--;
}
h-=th;
if (--count == 0) break;
ret=q<<BN_BITS4;
h=((h<<BN_BITS4)|(l>>BN_BITS4))&BN_MASK2;
l=(l&BN_MASK2l)<<BN_BITS4;
}
ret|=q;
return(ret);
}
/* BN_div computes dv := num / divisor, rounding towards zero, and sets up
* rm such that dv*divisor + rm = num holds.
* If 'dv' or 'rm' is NULL, the respective value is not returned.
*/
static int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
BN_POOL *ctx)
{
int release=0;
int norm_shift,i,loop;
BIGNUM *tmp,wnum,*snum,*sdiv,*res;
BN_ULONG *resp,*wnump;
BN_ULONG d0,d1;
int num_n,div_n;
if (BN_is_zero(divisor))
{
return(0);
}
if (BN_ucmp(num,divisor) < 0)
{
if (rm != NULL)
{ if (BN_copy(rm,num) == NULL) return(0); }
if (dv != NULL) BN_zero(dv);
return(1);
}
tmp=BN_POOL_get(ctx);
snum=BN_POOL_get(ctx);
sdiv=BN_POOL_get(ctx);
release+=3;
if (dv == NULL)
{res=BN_POOL_get(ctx);release++;}
else res=dv;
if (sdiv == NULL || res == NULL) goto err;
/* First we normalise the numbers */
norm_shift=BN_BITS2-((BN_num_bits(divisor))%BN_BITS2);
if (!(BN_lshift(sdiv,divisor,norm_shift))) goto err;
norm_shift+=BN_BITS2;
if (!(BN_lshift(snum,num,norm_shift))) goto err;
div_n=sdiv->top;
num_n=snum->top;
loop=num_n-div_n;
/* Lets setup a 'window' into snum
* This is the part that corresponds to the current
* 'area' being divided */
wnum.d = &(snum->d[loop]);
wnum.top = div_n;
/* only needed when BN_ucmp messes up the values between top and max */
wnum.dmax = snum->dmax - loop; /* so we don't step out of bounds */
/* Get the top 2 words of sdiv */
/* div_n=sdiv->top; */
d0=sdiv->d[div_n-1];
d1=(div_n == 1)?0:sdiv->d[div_n-2];
/* pointer to the 'top' of snum */
wnump= &(snum->d[num_n-1]);
/* Setup to 'res' */
if (!bn_wexpand(res,(loop+1))) goto err;
res->top=loop;
resp= &(res->d[loop-1]);
/* space for temp */
if (!bn_wexpand(tmp,(div_n+1))) goto err;
if (BN_ucmp(&wnum,sdiv) >= 0)
{
bn_sub_words(wnum.d, wnum.d, sdiv->d, div_n);
*resp=1;
}
else
res->top--;
/* if res->top == 0 then clear the neg value otherwise decrease
* the resp pointer */
if (res->top != 0)
resp--;
for (i=0; i<loop-1; i++, wnump--, resp--)
{
BN_ULONG q,l0;
/* the first part of the loop uses the top two words of
* snum and sdiv to calculate a BN_ULONG q such that
* | wnum - sdiv * q | < sdiv */
BN_ULONG n0,n1,rem=0;
n0=wnump[0];
n1=wnump[-1];
if (n0 == d0)
q=BN_MASK2;
else /* n0 < d0 */
{
BN_ULONG t2l,t2h,ql,qh;
q=bn_div_words(n0,n1,d0);
rem=(n1-q*d0)&BN_MASK2;
t2l=LBITS(d1); t2h=HBITS(d1);
ql =LBITS(q); qh =HBITS(q);
mul64(t2l,t2h,ql,qh); /* t2=(BN_ULLONG)d1*q; */
for (;;)
{
if ((t2h < rem) ||
((t2h == rem) && (t2l <= wnump[-2])))
break;
q--;
rem += d0;
if (rem < d0) break; /* don't let rem overflow */
if (t2l < d1) t2h--; t2l -= d1;
}
}
l0=bn_mul_words(tmp->d,sdiv->d,div_n,q);
tmp->d[div_n]=l0;
wnum.d--;
/* ingore top values of the bignums just sub the two
* BN_ULONG arrays with bn_sub_words */
if (bn_sub_words(wnum.d, wnum.d, tmp->d, div_n+1))
{
/* Note: As we have considered only the leading
* two BN_ULONGs in the calculation of q, sdiv * q
* might be greater than wnum (but then (q-1) * sdiv
* is less or equal than wnum)
*/
q--;
if (bn_add_words(wnum.d, wnum.d, sdiv->d, div_n))
/* we can't have an overflow here (assuming
* that q != 0, but if q == 0 then tmp is
* zero anyway) */
(*wnump)++;
}
/* store part of the result */
*resp = q;
}
bn_correct_top(snum);
if (rm != NULL)
{
/* Keep a copy of the neg flag in num because if rm==num
* BN_rshift() will overwrite it.
*/
BN_rshift(rm,snum,norm_shift);
}
BN_POOL_release(ctx,release);
return(1);
err:
BN_POOL_release(ctx,release);
return(0);
}
/*bn_div END*/
/*bn_asm START*/
static BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
{
BN_ULONG c=0;
BN_ULONG bl,bh;
if (num <= 0) return((BN_ULONG)0);
bl=LBITS(w);
bh=HBITS(w);
for (;;)
{
mul_add(rp[0],ap[0],bl,bh,c);
if (--num == 0) break;
mul_add(rp[1],ap[1],bl,bh,c);
if (--num == 0) break;
mul_add(rp[2],ap[2],bl,bh,c);
if (--num == 0) break;
mul_add(rp[3],ap[3],bl,bh,c);
if (--num == 0) break;
ap+=4;
rp+=4;
}
return(c);
}
static BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
{
BN_ULONG carry=0;
BN_ULONG bl,bh;
if (num <= 0) return((BN_ULONG)0);
bl=LBITS(w);
bh=HBITS(w);
for (;;)
{
mul(rp[0],ap[0],bl,bh,carry);
if (--num == 0) break;
mul(rp[1],ap[1],bl,bh,carry);
if (--num == 0) break;
mul(rp[2],ap[2],bl,bh,carry);
if (--num == 0) break;
mul(rp[3],ap[3],bl,bh,carry);
if (--num == 0) break;
ap+=4;
rp+=4;
}
return(carry);
}
static void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
{
if (n <= 0) return;
for (;;)
{
sqr64(r[0],r[1],a[0]);
if (--n == 0) break;
sqr64(r[2],r[3],a[1]);
if (--n == 0) break;
sqr64(r[4],r[5],a[2]);
if (--n == 0) break;
sqr64(r[6],r[7],a[3]);
if (--n == 0) break;
a+=4;
r+=8;
}
}
static BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
{
BN_ULONG c,l,t;
if (n <= 0) return((BN_ULONG)0);
c=0;
for (;;)
{
t=a[0];
t=(t+c)&BN_MASK2;
c=(t < c);
l=(t+b[0])&BN_MASK2;
c+=(l < t);
r[0]=l;
if (--n <= 0) break;
t=a[1];
t=(t+c)&BN_MASK2;
c=(t < c);
l=(t+b[1])&BN_MASK2;
c+=(l < t);
r[1]=l;
if (--n <= 0) break;
t=a[2];
t=(t+c)&BN_MASK2;
c=(t < c);
l=(t+b[2])&BN_MASK2;
c+=(l < t);
r[2]=l;
if (--n <= 0) break;
t=a[3];
t=(t+c)&BN_MASK2;
c=(t < c);
l=(t+b[3])&BN_MASK2;
c+=(l < t);
r[3]=l;
if (--n <= 0) break;
a+=4;
b+=4;
r+=4;
}
return((BN_ULONG)c);
}
static BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
{
BN_ULONG t1,t2;
int c=0;
if (n <= 0) return((BN_ULONG)0);
for (;;)
{
t1=a[0]; t2=b[0];
r[0]=(t1-t2-c)&BN_MASK2;
if (t1 != t2) c=(t1 < t2);
if (--n <= 0) break;
t1=a[1]; t2=b[1];
r[1]=(t1-t2-c)&BN_MASK2;
if (t1 != t2) c=(t1 < t2);
if (--n <= 0) break;
t1=a[2]; t2=b[2];
r[2]=(t1-t2-c)&BN_MASK2;
if (t1 != t2) c=(t1 < t2);
if (--n <= 0) break;
t1=a[3]; t2=b[3];
r[3]=(t1-t2-c)&BN_MASK2;
if (t1 != t2) c=(t1 < t2);
if (--n <= 0) break;
a+=4;
b+=4;
r+=4;
}
return(c);
}
#ifdef BN_MUL_COMBA
#undef bn_mul_comba8
#undef bn_mul_comba4
#undef bn_sqr_comba8
#undef bn_sqr_comba4
/* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */
/* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
/* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
/* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
#define mul_add_c(a,b,c0,c1,c2) \
t1=LBITS(a); t2=HBITS(a); \
bl=LBITS(b); bh=HBITS(b); \
mul64(t1,t2,bl,bh); \
c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
#define mul_add_c2(a,b,c0,c1,c2) \
t1=LBITS(a); t2=HBITS(a); \
bl=LBITS(b); bh=HBITS(b); \
mul64(t1,t2,bl,bh); \
if (t2 & BN_TBIT) c2++; \
t2=(t2+t2)&BN_MASK2; \
if (t1 & BN_TBIT) t2++; \
t1=(t1+t1)&BN_MASK2; \
c0=(c0+t1)&BN_MASK2; \
if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \
c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
#define sqr_add_c(a,i,c0,c1,c2) \
sqr64(t1,t2,(a)[i]); \
c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
#define sqr_add_c2(a,i,j,c0,c1,c2) \
mul_add_c2((a)[i],(a)[j],c0,c1,c2)
static void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
{
BN_ULONG bl,bh;
BN_ULONG t1,t2;
BN_ULONG c1,c2,c3;
c1=0;
c2=0;
c3=0;
mul_add_c(a[0],b[0],c1,c2,c3);
r[0]=c1;
c1=0;
mul_add_c(a[0],b[1],c2,c3,c1);
mul_add_c(a[1],b[0],c2,c3,c1);
r[1]=c2;
c2=0;
mul_add_c(a[2],b[0],c3,c1,c2);
mul_add_c(a[1],b[1],c3,c1,c2);
mul_add_c(a[0],b[2],c3,c1,c2);
r[2]=c3;
c3=0;
mul_add_c(a[0],b[3],c1,c2,c3);
mul_add_c(a[1],b[2],c1,c2,c3);
mul_add_c(a[2],b[1],c1,c2,c3);
mul_add_c(a[3],b[0],c1,c2,c3);
r[3]=c1;
c1=0;
mul_add_c(a[4],b[0],c2,c3,c1);
mul_add_c(a[3],b[1],c2,c3,c1);
mul_add_c(a[2],b[2],c2,c3,c1);
mul_add_c(a[1],b[3],c2,c3,c1);
mul_add_c(a[0],b[4],c2,c3,c1);
r[4]=c2;
c2=0;
mul_add_c(a[0],b[5],c3,c1,c2);
mul_add_c(a[1],b[4],c3,c1,c2);
mul_add_c(a[2],b[3],c3,c1,c2);
mul_add_c(a[3],b[2],c3,c1,c2);
mul_add_c(a[4],b[1],c3,c1,c2);
mul_add_c(a[5],b[0],c3,c1,c2);
r[5]=c3;
c3=0;
mul_add_c(a[6],b[0],c1,c2,c3);
mul_add_c(a[5],b[1],c1,c2,c3);
mul_add_c(a[4],b[2],c1,c2,c3);
mul_add_c(a[3],b[3],c1,c2,c3);
mul_add_c(a[2],b[4],c1,c2,c3);
mul_add_c(a[1],b[5],c1,c2,c3);
mul_add_c(a[0],b[6],c1,c2,c3);
r[6]=c1;
c1=0;
mul_add_c(a[0],b[7],c2,c3,c1);
mul_add_c(a[1],b[6],c2,c3,c1);
mul_add_c(a[2],b[5],c2,c3,c1);
mul_add_c(a[3],b[4],c2,c3,c1);
mul_add_c(a[4],b[3],c2,c3,c1);
mul_add_c(a[5],b[2],c2,c3,c1);
mul_add_c(a[6],b[1],c2,c3,c1);
mul_add_c(a[7],b[0],c2,c3,c1);
r[7]=c2;
c2=0;
mul_add_c(a[7],b[1],c3,c1,c2);
mul_add_c(a[6],b[2],c3,c1,c2);
mul_add_c(a[5],b[3],c3,c1,c2);
mul_add_c(a[4],b[4],c3,c1,c2);
mul_add_c(a[3],b[5],c3,c1,c2);
mul_add_c(a[2],b[6],c3,c1,c2);
mul_add_c(a[1],b[7],c3,c1,c2);
r[8]=c3;
c3=0;
mul_add_c(a[2],b[7],c1,c2,c3);
mul_add_c(a[3],b[6],c1,c2,c3);
mul_add_c(a[4],b[5],c1,c2,c3);
mul_add_c(a[5],b[4],c1,c2,c3);
mul_add_c(a[6],b[3],c1,c2,c3);
mul_add_c(a[7],b[2],c1,c2,c3);
r[9]=c1;
c1=0;
mul_add_c(a[7],b[3],c2,c3,c1);
mul_add_c(a[6],b[4],c2,c3,c1);
mul_add_c(a[5],b[5],c2,c3,c1);
mul_add_c(a[4],b[6],c2,c3,c1);
mul_add_c(a[3],b[7],c2,c3,c1);
r[10]=c2;
c2=0;
mul_add_c(a[4],b[7],c3,c1,c2);
mul_add_c(a[5],b[6],c3,c1,c2);
mul_add_c(a[6],b[5],c3,c1,c2);
mul_add_c(a[7],b[4],c3,c1,c2);
r[11]=c3;
c3=0;
mul_add_c(a[7],b[5],c1,c2,c3);
mul_add_c(a[6],b[6],c1,c2,c3);
mul_add_c(a[5],b[7],c1,c2,c3);
r[12]=c1;
c1=0;
mul_add_c(a[6],b[7],c2,c3,c1);
mul_add_c(a[7],b[6],c2,c3,c1);
r[13]=c2;
c2=0;
mul_add_c(a[7],b[7],c3,c1,c2);
r[14]=c3;
r[15]=c1;
}
#ifdef BN_RECURSION
static void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
{
BN_ULONG bl,bh;
BN_ULONG t1,t2;
BN_ULONG c1,c2,c3;
c1=0;
c2=0;
c3=0;
mul_add_c(a[0],b[0],c1,c2,c3);
r[0]=c1;
c1=0;
mul_add_c(a[0],b[1],c2,c3,c1);
mul_add_c(a[1],b[0],c2,c3,c1);
r[1]=c2;
c2=0;
mul_add_c(a[2],b[0],c3,c1,c2);
mul_add_c(a[1],b[1],c3,c1,c2);
mul_add_c(a[0],b[2],c3,c1,c2);
r[2]=c3;
c3=0;
mul_add_c(a[0],b[3],c1,c2,c3);
mul_add_c(a[1],b[2],c1,c2,c3);
mul_add_c(a[2],b[1],c1,c2,c3);
mul_add_c(a[3],b[0],c1,c2,c3);
r[3]=c1;
c1=0;
mul_add_c(a[3],b[1],c2,c3,c1);
mul_add_c(a[2],b[2],c2,c3,c1);
mul_add_c(a[1],b[3],c2,c3,c1);
r[4]=c2;
c2=0;
mul_add_c(a[2],b[3],c3,c1,c2);
mul_add_c(a[3],b[2],c3,c1,c2);
r[5]=c3;
c3=0;
mul_add_c(a[3],b[3],c1,c2,c3);
r[6]=c1;
r[7]=c2;
}
#endif /*BN_RECURSION */
static void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
{
BN_ULONG bl,bh;
BN_ULONG t1,t2;
BN_ULONG c1,c2,c3;
c1=0;
c2=0;
c3=0;
sqr_add_c(a,0,c1,c2,c3);
r[0]=c1;
c1=0;
sqr_add_c2(a,1,0,c2,c3,c1);
r[1]=c2;
c2=0;
sqr_add_c(a,1,c3,c1,c2);
sqr_add_c2(a,2,0,c3,c1,c2);
r[2]=c3;
c3=0;
sqr_add_c2(a,3,0,c1,c2,c3);
sqr_add_c2(a,2,1,c1,c2,c3);
r[3]=c1;
c1=0;
sqr_add_c(a,2,c2,c3,c1);
sqr_add_c2(a,3,1,c2,c3,c1);
sqr_add_c2(a,4,0,c2,c3,c1);
r[4]=c2;
c2=0;
sqr_add_c2(a,5,0,c3,c1,c2);
sqr_add_c2(a,4,1,c3,c1,c2);
sqr_add_c2(a,3,2,c3,c1,c2);
r[5]=c3;
c3=0;
sqr_add_c(a,3,c1,c2,c3);
sqr_add_c2(a,4,2,c1,c2,c3);
sqr_add_c2(a,5,1,c1,c2,c3);
sqr_add_c2(a,6,0,c1,c2,c3);
r[6]=c1;
c1=0;
sqr_add_c2(a,7,0,c2,c3,c1);
sqr_add_c2(a,6,1,c2,c3,c1);
sqr_add_c2(a,5,2,c2,c3,c1);
sqr_add_c2(a,4,3,c2,c3,c1);
r[7]=c2;
c2=0;
sqr_add_c(a,4,c3,c1,c2);
sqr_add_c2(a,5,3,c3,c1,c2);
sqr_add_c2(a,6,2,c3,c1,c2);
sqr_add_c2(a,7,1,c3,c1,c2);
r[8]=c3;
c3=0;
sqr_add_c2(a,7,2,c1,c2,c3);
sqr_add_c2(a,6,3,c1,c2,c3);
sqr_add_c2(a,5,4,c1,c2,c3);
r[9]=c1;
c1=0;
sqr_add_c(a,5,c2,c3,c1);
sqr_add_c2(a,6,4,c2,c3,c1);
sqr_add_c2(a,7,3,c2,c3,c1);
r[10]=c2;
c2=0;
sqr_add_c2(a,7,4,c3,c1,c2);
sqr_add_c2(a,6,5,c3,c1,c2);
r[11]=c3;
c3=0;
sqr_add_c(a,6,c1,c2,c3);
sqr_add_c2(a,7,5,c1,c2,c3);
r[12]=c1;
c1=0;
sqr_add_c2(a,7,6,c2,c3,c1);
r[13]=c2;
c2=0;
sqr_add_c(a,7,c3,c1,c2);
r[14]=c3;
r[15]=c1;
}
static void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
{
BN_ULONG bl,bh;
BN_ULONG t1,t2;
BN_ULONG c1,c2,c3;
c1=0;
c2=0;
c3=0;
sqr_add_c(a,0,c1,c2,c3);
r[0]=c1;
c1=0;
sqr_add_c2(a,1,0,c2,c3,c1);
r[1]=c2;
c2=0;
sqr_add_c(a,1,c3,c1,c2);
sqr_add_c2(a,2,0,c3,c1,c2);
r[2]=c3;
c3=0;
sqr_add_c2(a,3,0,c1,c2,c3);
sqr_add_c2(a,2,1,c1,c2,c3);
r[3]=c1;
c1=0;
sqr_add_c(a,2,c2,c3,c1);
sqr_add_c2(a,3,1,c2,c3,c1);
r[4]=c2;
c2=0;
sqr_add_c2(a,3,2,c3,c1,c2);
r[5]=c3;
c3=0;
sqr_add_c(a,3,c1,c2,c3);
r[6]=c1;
r[7]=c2;
}
#endif /* !BN_MUL_COMBA */
/*bn_asm END*/
/*bn_mul START*/
/* Here follows specialised variants of bn_add_words() and
bn_sub_words(). They have the property performing operations on
arrays of different sizes. The sizes of those arrays is expressed through
cl, which is the common length ( basicall, min(len(a),len(b)) ), and dl,
which is the delta between the two lengths, calculated as len(a)-len(b).
All lengths are the number of BN_ULONGs... For the operations that require
a result array as parameter, it must have the length cl+abs(dl).
These functions should probably end up in bn_asm.c as soon as there are
assembler counterparts for the systems that use assembler files. */
#ifdef BN_RECURSION
static BN_ULONG bn_sub_part_words(BN_ULONG *r,
const BN_ULONG *a, const BN_ULONG *b,
int cl, int dl)
{
BN_ULONG c, t;
c = bn_sub_words(r, a, b, cl);
if (dl == 0)
return c;
r += cl;
a += cl;
b += cl;
if (dl < 0)
{
for (;;)
{
t = b[0];
r[0] = (0-t-c)&BN_MASK2;
if (t != 0) c=1;
if (++dl >= 0) break;
t = b[1];
r[1] = (0-t-c)&BN_MASK2;
if (t != 0) c=1;
if (++dl >= 0) break;
t = b[2];
r[2] = (0-t-c)&BN_MASK2;
if (t != 0) c=1;
if (++dl >= 0) break;
t = b[3];
r[3] = (0-t-c)&BN_MASK2;
if (t != 0) c=1;
if (++dl >= 0) break;
b += 4;
r += 4;
}
}
else
{
int save_dl = dl;
while(c)
{
t = a[0];
r[0] = (t-c)&BN_MASK2;
if (t != 0) c=0;
if (--dl <= 0) break;
t = a[1];
r[1] = (t-c)&BN_MASK2;
if (t != 0) c=0;
if (--dl <= 0) break;
t = a[2];
r[2] = (t-c)&BN_MASK2;
if (t != 0) c=0;
if (--dl <= 0) break;
t = a[3];
r[3] = (t-c)&BN_MASK2;
if (t != 0) c=0;
if (--dl <= 0) break;
save_dl = dl;
a += 4;
r += 4;
}
if (dl > 0)
{
if (save_dl > dl)
{
switch (save_dl - dl)
{
case 1:
r[1] = a[1];
if (--dl <= 0) break;
case 2:
r[2] = a[2];
if (--dl <= 0) break;
case 3:
r[3] = a[3];
if (--dl <= 0) break;
}
a += 4;
r += 4;
}
}
if (dl > 0)
{
for(;;)
{
r[0] = a[0];
if (--dl <= 0) break;
r[1] = a[1];
if (--dl <= 0) break;
r[2] = a[2];
if (--dl <= 0) break;
r[3] = a[3];
if (--dl <= 0) break;
a += 4;
r += 4;
}
}
}
return c;
}
/* Karatsuba recursive multiplication algorithm
* (cf. Knuth, The Art of Computer Programming, Vol. 2) */
/* r is 2*n2 words in size,
* a and b are both n2 words in size.
* n2 must be a power of 2.
* We multiply and return the result.
* t must be 2*n2 words in size
* We calculate
* a[0]*b[0]
* a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
* a[1]*b[1]
*/
static void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
int dna, int dnb, BN_ULONG *t)
{
int n=n2/2,c1,c2;
int tna=n+dna, tnb=n+dnb;
unsigned int neg,zero;
BN_ULONG ln,lo,*p;
# ifdef BN_MUL_COMBA
/* Only call bn_mul_comba 8 if n2 == 8 and the
* two arrays are complete [steve]
*/
if (n2 == 8 && dna == 0 && dnb == 0)
{
bn_mul_comba8(r,a,b);
return;
}
# endif /* BN_MUL_COMBA */
/* Else do normal multiply */
if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL)
{
bn_mul_normal(r,a,n2+dna,b,n2+dnb);
if ((dna + dnb) < 0)
memset(&r[2*n2 + dna + dnb], 0,
sizeof(BN_ULONG) * -(dna + dnb));
return;
}
/* r=(a[0]-a[1])*(b[1]-b[0]) */
c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna);
c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n);
zero=neg=0;
switch (c1*3+c2)
{
case -4:
bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */
bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */
break;
case -3:
zero=1;
break;
case -2:
bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */
bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n); /* + */
neg=1;
break;
case -1:
case 0:
case 1:
zero=1;
break;
case 2:
bn_sub_part_words(t, a, &(a[n]),tna,n-tna); /* + */
bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */
neg=1;
break;
case 3:
zero=1;
break;
case 4:
bn_sub_part_words(t, a, &(a[n]),tna,n-tna);
bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n);
break;
}
# ifdef BN_MUL_COMBA
if (n == 4 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba4 could take
extra args to do this well */
{
if (!zero)
bn_mul_comba4(&(t[n2]),t,&(t[n]));
else
memset(&(t[n2]),0,8*sizeof(BN_ULONG));
bn_mul_comba4(r,a,b);
bn_mul_comba4(&(r[n2]),&(a[n]),&(b[n]));
}
else if (n == 8 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba8 could
take extra args to do this
well */
{
if (!zero)
bn_mul_comba8(&(t[n2]),t,&(t[n]));
else
memset(&(t[n2]),0,16*sizeof(BN_ULONG));
bn_mul_comba8(r,a,b);
bn_mul_comba8(&(r[n2]),&(a[n]),&(b[n]));
}
else
# endif /* BN_MUL_COMBA */
{
p= &(t[n2*2]);
if (!zero)
bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p);
else
memset(&(t[n2]),0,n2*sizeof(BN_ULONG));
bn_mul_recursive(r,a,b,n,0,0,p);
bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),n,dna,dnb,p);
}
/* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
* r[10] holds (a[0]*b[0])
* r[32] holds (b[1]*b[1])
*/
c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
if (neg) /* if t[32] is negative */
{
c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
}
else
{
/* Might have a carry */
c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),t,n2));
}
/* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
* r[10] holds (a[0]*b[0])
* r[32] holds (b[1]*b[1])
* c1 holds the carry bits
*/
c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
if (c1)
{
p= &(r[n+n2]);
lo= *p;
ln=(lo+c1)&BN_MASK2;
*p=ln;
/* The overflow will stop before we over write
* words we should not overwrite */
if (ln < (BN_ULONG)c1)
{
do {
p++;
lo= *p;
ln=(lo+1)&BN_MASK2;
*p=ln;
} while (ln == 0);
}
}
}
/* n+tn is the word length
* t needs to be n*4 is size, as does r */
static void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n,
int tna, int tnb, BN_ULONG *t)
{
int i,j,n2=n*2;
int c1,c2,neg,zero;
BN_ULONG ln,lo,*p;
if (n < 8)
{
bn_mul_normal(r,a,n+tna,b,n+tnb);
return;
}
/* r=(a[0]-a[1])*(b[1]-b[0]) */
c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna);
c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n);
zero=neg=0;
switch (c1*3+c2)
{
case -4:
bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */
bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */
break;
case -3:
zero=1;
/* break; */
case -2:
bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */
bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n); /* + */
neg=1;
break;
case -1:
case 0:
case 1:
zero=1;
/* break; */
case 2:
bn_sub_part_words(t, a, &(a[n]),tna,n-tna); /* + */
bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */
neg=1;
break;
case 3:
zero=1;
/* break; */
case 4:
bn_sub_part_words(t, a, &(a[n]),tna,n-tna);
bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n);
break;
}
if (n == 8)
{
bn_mul_comba8(&(t[n2]),t,&(t[n]));
bn_mul_comba8(r,a,b);
bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb);
memset(&(r[n2+tna+tnb]),0,sizeof(BN_ULONG)*(n2-tna-tnb));
}
else
{
p= &(t[n2*2]);
bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p);
bn_mul_recursive(r,a,b,n,0,0,p);
i=n/2;
/* If there is only a bottom half to the number,
* just do it */
if (tna > tnb)
j = tna - i;
else
j = tnb - i;
if (j == 0)
{
bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),
i,tna-i,tnb-i,p);
memset(&(r[n2+i*2]),0,sizeof(BN_ULONG)*(n2-i*2));
}
else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */
{
bn_mul_part_recursive(&(r[n2]),&(a[n]),&(b[n]),
i,tna-i,tnb-i,p);
memset(&(r[n2+tna+tnb]),0,
sizeof(BN_ULONG)*(n2-tna-tnb));
}
else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */
{
memset(&(r[n2]),0,sizeof(BN_ULONG)*n2);
if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL
&& tnb < BN_MUL_RECURSIVE_SIZE_NORMAL)
{
bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb);
}
else
{
for (;;)
{
i/=2;
if (i < tna && i < tnb)
{
bn_mul_part_recursive(&(r[n2]),
&(a[n]),&(b[n]),
i,tna-i,tnb-i,p);
break;
}
else if (i <= tna && i <= tnb)
{
bn_mul_recursive(&(r[n2]),
&(a[n]),&(b[n]),
i,tna-i,tnb-i,p);
break;
}
}
}
}
}
/* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
* r[10] holds (a[0]*b[0])
* r[32] holds (b[1]*b[1])
*/
c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
if (neg) /* if t[32] is negative */
{
c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
}
else
{
/* Might have a carry */
c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),t,n2));
}
/* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
* r[10] holds (a[0]*b[0])
* r[32] holds (b[1]*b[1])
* c1 holds the carry bits
*/
c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
if (c1)
{
p= &(r[n+n2]);
lo= *p;
ln=(lo+c1)&BN_MASK2;
*p=ln;
/* The overflow will stop before we over write
* words we should not overwrite */
if (ln < (BN_ULONG)c1)
{
do {
p++;
lo= *p;
ln=(lo+1)&BN_MASK2;
*p=ln;
} while (ln == 0);
}
}
}
#endif /* BN_RECURSION */
static int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_POOL *ctx)
{
int release=0;
int ret=0;
int top,al,bl;
BIGNUM *rr;
#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
int i;
#endif
#ifdef BN_RECURSION
BIGNUM *t=NULL;
int j=0,k;
#endif
al=a->top;
bl=b->top;
if ((al == 0) || (bl == 0))
{
BN_zero(r);
return(1);
}
top=al+bl;
if ((r == a) || (r == b))
{
if ((rr = BN_POOL_get(ctx)) == NULL) goto err;
release++;
}
else
rr = r;
#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
i = al-bl;
#endif
#ifdef BN_MUL_COMBA
if (i == 0)
{
if (al == 8)
{
if (bn_wexpand(rr,16) == NULL) goto err;
rr->top=16;
bn_mul_comba8(rr->d,a->d,b->d);
goto end;
}
}
#endif /* BN_MUL_COMBA */
#ifdef BN_RECURSION
if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL))
{
if (i >= -1 && i <= 1)
{
int sav_j =0;
/* Find out the power of two lower or equal
to the longest of the two numbers */
if (i >= 0)
{
j = BN_num_bits_word((BN_ULONG)al);
}
if (i == -1)
{
j = BN_num_bits_word((BN_ULONG)bl);
}
sav_j = j;
j = 1<<(j-1);
k = j+j;
t = BN_POOL_get(ctx);
release++;
if (al > j || bl > j)
{
bn_wexpand(t,k*4);
bn_wexpand(rr,k*4);
bn_mul_part_recursive(rr->d,a->d,b->d,
j,al-j,bl-j,t->d);
}
else /* al <= j || bl <= j */
{
bn_wexpand(t,k*2);
bn_wexpand(rr,k*2);
bn_mul_recursive(rr->d,a->d,b->d,
j,al-j,bl-j,t->d);
}
rr->top=top;
goto end;
}
}
#endif /* BN_RECURSION */
if (bn_wexpand(rr,top) == NULL) goto err;
rr->top=top;
bn_mul_normal(rr->d,a->d,al,b->d,bl);
#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
end:
#endif
bn_correct_top(rr);
if (r != rr) BN_copy(r,rr);
ret=1;
err:
BN_POOL_release(ctx,release);
return(ret);
}
static void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
{
BN_ULONG *rr;
if (na < nb)
{
int itmp;
BN_ULONG *ltmp;
itmp=na; na=nb; nb=itmp;
ltmp=a; a=b; b=ltmp;
}
rr= &(r[na]);
if (nb <= 0)
{
(void)bn_mul_words(r,a,na,0);
return;
}
else
rr[0]=bn_mul_words(r,a,na,b[0]);
for (;;)
{
if (--nb <= 0) return;
rr[1]=bn_mul_add_words(&(r[1]),a,na,b[1]);
if (--nb <= 0) return;
rr[2]=bn_mul_add_words(&(r[2]),a,na,b[2]);
if (--nb <= 0) return;
rr[3]=bn_mul_add_words(&(r[3]),a,na,b[3]);
if (--nb <= 0) return;
rr[4]=bn_mul_add_words(&(r[4]),a,na,b[4]);
rr+=4;
r+=4;
b+=4;
}
}
/*bn_mul END*/
/* tmp must have 2*n words */
static void bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp)
{
int i,j,max;
const BN_ULONG *ap;
BN_ULONG *rp;
max=n*2;
ap=a;
rp=r;
rp[0]=rp[max-1]=0;
rp++;
j=n;
if (--j > 0)
{
ap++;
rp[j]=bn_mul_words(rp,ap,j,ap[-1]);
rp+=2;
}
for (i=n-2; i>0; i--)
{
j--;
ap++;
rp[j]=bn_mul_add_words(rp,ap,j,ap[-1]);
rp+=2;
}
bn_add_words(r,r,r,max);
/* There will not be a carry */
bn_sqr_words(tmp,a,n);
bn_add_words(r,r,tmp,max);
}
#ifdef BN_RECURSION
/* r is 2*n words in size,
* a and b are both n words in size. (There's not actually a 'b' here ...)
* n must be a power of 2.
* We multiply and return the result.
* t must be 2*n words in size
* We calculate
* a[0]*b[0]
* a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
* a[1]*b[1]
*/
static void bn_sqr_recursive(BN_ULONG *r, const BN_ULONG *a, int n2, BN_ULONG *t)
{
int n=n2/2;
int zero,c1;
BN_ULONG ln,lo,*p;
if (n2 == 4)
{
#ifndef BN_SQR_COMBA
bn_sqr_normal(r,a,4,t);
#else
bn_sqr_comba4(r,a);
#endif
return;
}
else if (n2 == 8)
{
#ifndef BN_SQR_COMBA
bn_sqr_normal(r,a,8,t);
#else
bn_sqr_comba8(r,a);
#endif
return;
}
if (n2 < BN_SQR_RECURSIVE_SIZE_NORMAL)
{
bn_sqr_normal(r,a,n2,t);
return;
}
/* r=(a[0]-a[1])*(a[1]-a[0]) */
c1=bn_cmp_words(a,&(a[n]),n);
zero=0;
if (c1 > 0)
bn_sub_words(t,a,&(a[n]),n);
else if (c1 < 0)
bn_sub_words(t,&(a[n]),a,n);
else
zero=1;
/* The result will always be negative unless it is zero */
p= &(t[n2*2]);
if (!zero)
bn_sqr_recursive(&(t[n2]),t,n,p);
else
memset(&(t[n2]),0,n2*sizeof(BN_ULONG));
bn_sqr_recursive(r,a,n,p);
bn_sqr_recursive(&(r[n2]),&(a[n]),n,p);
/* t[32] holds (a[0]-a[1])*(a[1]-a[0]), it is negative or zero
* r[10] holds (a[0]*b[0])
* r[32] holds (b[1]*b[1])
*/
c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
/* t[32] is negative */
c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
/* t[32] holds (a[0]-a[1])*(a[1]-a[0])+(a[0]*a[0])+(a[1]*a[1])
* r[10] holds (a[0]*a[0])
* r[32] holds (a[1]*a[1])
* c1 holds the carry bits
*/
c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
if (c1)
{
p= &(r[n+n2]);
lo= *p;
ln=(lo+c1)&BN_MASK2;
*p=ln;
/* The overflow will stop before we over write
* words we should not overwrite */
if (ln < (BN_ULONG)c1)
{
do {
p++;
lo= *p;
ln=(lo+1)&BN_MASK2;
*p=ln;
} while (ln == 0);
}
}
}
#endif /*BN_RECURSION */
/* unsigned subtraction of b from a, a must be larger than b. */
static int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
{
int max,min,dif;
register BN_ULONG t1,t2,*ap,*bp,*rp;
int i,carry;
max = a->top;
min = b->top;
dif = max - min;
if (dif < 0) /* hmm... should not be happening */
{
return(0);
}
if (bn_wexpand(r,max) == NULL) return(0);
ap=a->d;
bp=b->d;
rp=r->d;
carry=0;
for (i = min; i != 0; i--)
{
t1= *(ap++);
t2= *(bp++);
if (carry)
{
carry=(t1 <= t2);
t1=(t1-t2-1)&BN_MASK2;
}
else
{
carry=(t1 < t2);
t1=(t1-t2)&BN_MASK2;
}
*(rp++)=t1&BN_MASK2;
}
if (carry) /* subtracted */
{
if (!dif)
/* error: a < b */
return 0;
while (dif)
{
dif--;
t1 = *(ap++);
t2 = (t1-1)&BN_MASK2;
*(rp++) = t2;
if (t1)
break;
}
}
if (rp != ap)
{
for (;;)
{
if (!dif--) break;
rp[0]=ap[0];
if (!dif--) break;
rp[1]=ap[1];
if (!dif--) break;
rp[2]=ap[2];
if (!dif--) break;
rp[3]=ap[3];
rp+=4;
ap+=4;
}
}
r->top=max;
bn_correct_top(r);
return(1);
}
static int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont, BN_POOL *ctx)
{
int retn=0;
BIGNUM *n,*r;
BN_ULONG *ap,*np,*rp,n0,v,*nrp;
int al,nl,max,i,x,ri;
if ((r = BN_POOL_get(ctx)) == NULL) goto err;
if (!BN_copy(r,a)) goto err;
n= &(mont->N);
ap=a->d;
/* mont->ri is the size of mont->N in bits (rounded up
to the word size) */
al=ri=mont->ri/BN_BITS2;
nl=n->top;
if ((al == 0) || (nl == 0)) { r->top=0; return(1); }
max=(nl+al+1); /* allow for overflow (no?) XXX */
if (bn_wexpand(r,max) == NULL) goto err;
if (bn_wexpand(ret,max) == NULL) goto err;
np=n->d;
rp=r->d;
nrp= &(r->d[nl]);
/* clear the top words of T */
for (i=r->top; i<max; i++) /* memset? XXX */
r->d[i]=0;
r->top=max;
n0=mont->n0;
for (i=0; i<nl; i++)
{
v=bn_mul_add_words(rp,np,nl,(rp[0]*n0)&BN_MASK2);
nrp++;
rp++;
if (((nrp[-1]+=v)&BN_MASK2) >= v)
continue;
else
{
if (((++nrp[0])&BN_MASK2) != 0) continue;
if (((++nrp[1])&BN_MASK2) != 0) continue;
for (x=2; (((++nrp[x])&BN_MASK2) == 0); x++) ;
}
}
bn_correct_top(r);
/* mont->ri will be a multiple of the word size */
x=ri;
rp=ret->d;
ap= &(r->d[x]);
if (r->top < x)
al=0;
else
al=r->top-x;
ret->top=al;
al-=4;
for (i=0; i<al; i+=4)
{
BN_ULONG t1,t2,t3,t4;
t1=ap[i+0];
t2=ap[i+1];
t3=ap[i+2];
t4=ap[i+3];
rp[i+0]=t1;
rp[i+1]=t2;
rp[i+2]=t3;
rp[i+3]=t4;
}
al+=4;
for (; i<al; i++)
rp[i]=ap[i];
if (BN_ucmp(ret, &(mont->N)) >= 0)
{
if (!BN_usub(ret,ret,&(mont->N))) goto err;
}
retn=1;
err:
BN_POOL_release(ctx,1);
return(retn);
}
/*SQR*/
/* r must not be a */
/* I've just gone over this and it is now %20 faster on x86 - eay - 27 Jun 96 */
static int BN_sqr(BIGNUM *r, const BIGNUM *a, BN_POOL *ctx)
{
int release=0;
int max,al;
int ret = 0;
BIGNUM *tmp,*rr;
al=a->top;
if (al <= 0)
{
r->top=0;
return 1;
}
if(a != r)
rr=r;
else
{rr=BN_POOL_get(ctx);release++;}
tmp=BN_POOL_get(ctx);
release++;
if (!rr || !tmp) goto err;
max = 2 * al; /* Non-zero (from above) */
if (bn_wexpand(rr,max) == NULL) goto err;
if (al == 4)
{
#ifndef BN_SQR_COMBA
BN_ULONG t[8];
bn_sqr_normal(rr->d,a->d,4,t);
#else
bn_sqr_comba4(rr->d,a->d);
#endif
}
else if (al == 8)
{
#ifndef BN_SQR_COMBA
BN_ULONG t[16];
bn_sqr_normal(rr->d,a->d,8,t);
#else
bn_sqr_comba8(rr->d,a->d);
#endif
}
else
{
#if defined(BN_RECURSION)
if (al < BN_SQR_RECURSIVE_SIZE_NORMAL)
{
BN_ULONG t[BN_SQR_RECURSIVE_SIZE_NORMAL*2];
bn_sqr_normal(rr->d,a->d,al,t);
}
else
{
int j,k;
j=BN_num_bits_word((BN_ULONG)al);
j=1<<(j-1);
k=j+j;
if (al == j)
{
if (bn_wexpand(tmp,k*2) == NULL) goto err;
bn_sqr_recursive(rr->d,a->d,al,tmp->d);
}
else
{
if (bn_wexpand(tmp,max) == NULL) goto err;
bn_sqr_normal(rr->d,a->d,al,tmp->d);
}
}
#else
if (bn_wexpand(tmp,max) == NULL) goto err;
bn_sqr_normal(rr->d,a->d,al,tmp->d);
#endif
}
/* If the most-significant half of the top word of 'a' is zero, then
* the square of 'a' will max-1 words. */
if(a->d[al - 1] == (a->d[al - 1] & BN_MASK2l))
rr->top = max - 1;
else
rr->top = max;
if (rr != r) BN_copy(r,rr);
ret = 1;
err:
BN_POOL_release(ctx,release);
return(ret);
}
static int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_MONT_CTX *mont, BN_POOL *ctx)
{
BIGNUM *tmp;
int ret=0;
tmp = BN_POOL_get(ctx);
if (tmp == NULL) goto err;
if (0 == BN_ucmp(a,b))
{
if (!BN_sqr(tmp,a,ctx)) goto err;
}
else
{
if (!BN_mul(tmp,a,b,ctx)) goto err;
}
/* reduce from aRR to aR */
if (!BN_from_montgomery(r,tmp,mont,ctx)) goto err;
ret=1;
err:
BN_POOL_release(ctx,1);
return(ret);
}
/* unsigned add of b to a */
static int BN_uadd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
{
int max,min,dif;
BN_ULONG *ap,*bp,*rp,carry,t1,t2;
const BIGNUM *tmp;
if (a->top < b->top)
{ tmp=a; a=b; b=tmp; }
max = a->top;
min = b->top;
dif = max - min;
if (bn_wexpand(r,max+1) == NULL)
return 0;
r->top=max;
ap=a->d;
bp=b->d;
rp=r->d;
carry=bn_add_words(rp,ap,bp,min);
rp+=min;
ap+=min;
bp+=min;
if (carry)
{
while (dif)
{
dif--;
t1 = *(ap++);
t2 = (t1+1) & BN_MASK2;
*(rp++) = t2;
if (t2)
{
carry=0;
break;
}
}
if (carry)
{
/* carry != 0 => dif == 0 */
*rp = 1;
r->top++;
}
}
if (dif && rp != ap)
while (dif--)
/* copy remaining words if ap != rp */
*(rp++) = *(ap++);
return 1;
}
static int BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
{
int max;
/* We are actually doing a - b :-) */
max=(a->top > b->top)?a->top:b->top;
if (bn_wexpand(r,max) == NULL) return(0);
if (BN_ucmp(a,b) < 0)
{
if (!BN_usub(r,b,a)) return(0);
DHPRINT("----\n");
}
else
{
if (!BN_usub(r,a,b)) return(0);
}
return(1);
}
static int BN_mul_word(BIGNUM *a, BN_ULONG w)
{
BN_ULONG ll;
w&=BN_MASK2;
if (a->top)
{
if (w == 0)
BN_zero(a);
else
{
ll=bn_mul_words(a->d,a->d,a->top,w);
if (ll)
{
if (bn_wexpand(a,a->top+1) == NULL) return(0);
a->d[a->top++]=ll;
}
}
}
return(1);
}
/* solves ax == 1 (mod n) */
static BIGNUM *BN_mod_inverse(BIGNUM *in,
const BIGNUM *a, const BIGNUM *n, BN_POOL *ctx)
{
BIGNUM *A,*B,*X,*Y,*M,*D,*T,*R=NULL;
BIGNUM *ret=NULL;
int sign;
A = BN_POOL_get(ctx);
B = BN_POOL_get(ctx);
X = BN_POOL_get(ctx);
D = BN_POOL_get(ctx);
M = BN_POOL_get(ctx);
Y = BN_POOL_get(ctx);
T = BN_POOL_get(ctx);
if (T == NULL) goto err;
if (in == NULL)
R=BN_new();
else
R=in;
if (R == NULL) goto err;
BN_one(X);
BN_zero(Y);
if (BN_copy(B,a) == NULL) goto err;
if (BN_copy(A,n) == NULL) goto err;
if (BN_ucmp(B, A) >= 0)
{
if (!(BN_mod(B, B, A, ctx))) goto err;
}
sign = -1;
/* From B = a mod |n|, A = |n| it follows that
*
* 0 <= B < A,
* -sign*X*a == B (mod |n|),
* sign*Y*a == A (mod |n|).
*/
if (BN_is_odd(n) && (BN_num_bits(n) <= (BN_BITS <= 32 ? 450 : 2048)))
{
/* Binary inversion algorithm; requires odd modulus.
* This is faster than the general algorithm if the modulus
* is sufficiently small (about 400 .. 500 bits on 32-bit
* sytems, but much more on 64-bit systems) */
int shift;
while (!BN_is_zero(B))
{
/*
* 0 < B < |n|,
* 0 < A <= |n|,
* (1) -sign*X*a == B (mod |n|),
* (2) sign*Y*a == A (mod |n|)
*/
/* Now divide B by the maximum possible power of two in the integers,
* and divide X by the same value mod |n|.
* When we're done, (1) still holds. */
shift = 0;
while (!BN_is_bit_set(B, shift)) /* note that 0 < B */
{
shift++;
if (BN_is_odd(X))
{
if (!BN_uadd(X, X, n)) goto err;
}
/* now X is even, so we can easily divide it by two */
if (!BN_rshift1(X, X)) goto err;
}
if (shift > 0)
{
if (!BN_rshift(B, B, shift)) goto err;
}
/* Same for A and Y. Afterwards, (2) still holds. */
shift = 0;
while (!BN_is_bit_set(A, shift)) /* note that 0 < A */
{
shift++;
if (BN_is_odd(Y))
{
if (!BN_uadd(Y, Y, n)) goto err;
}
/* now Y is even */
if (!BN_rshift1(Y, Y)) goto err;
}
if (shift > 0)
{
if (!BN_rshift(A, A, shift)) goto err;
}
/* We still have (1) and (2).
* Both A and B are odd.
* The following computations ensure that
*
* 0 <= B < |n|,
* 0 < A < |n|,
* (1) -sign*X*a == B (mod |n|),
* (2) sign*Y*a == A (mod |n|),
*
* and that either A or B is even in the next iteration.
*/
if (BN_ucmp(B, A) >= 0)
{
/* -sign*(X + Y)*a == B - A (mod |n|) */
if (!BN_uadd(X, X, Y)) goto err;
/* NB: we could use BN_mod_add_quick(X, X, Y, n), but that
* actually makes the algorithm slower */
if (!BN_usub(B, B, A)) goto err;
}
else
{
/* sign*(X + Y)*a == A - B (mod |n|) */
if (!BN_uadd(Y, Y, X)) goto err;
/* as above, BN_mod_add_quick(Y, Y, X, n) would slow things down */
if (!BN_usub(A, A, B)) goto err;
}
}
}
else
goto err;
/*
* The while loop (Euclid's algorithm) ends when
* A == gcd(a,n);
* we have
* sign*Y*a == A (mod |n|),
* where Y is non-negative.
*/
if (sign < 0)
{
if (!BN_sub(Y,n,Y)) goto err;
}
/* Now Y*a == A (mod |n|). */
if (BN_is_one(A))
{
/* Y*a == 1 (mod |n|) */
if (BN_ucmp(Y,n) < 0)
{
if (!BN_copy(R,Y)) goto err;
}
else
{
if (!(BN_mod(R, Y, n, ctx))) goto err;
}
}
else
{
goto err;
}
ret=R;
err:
if ((ret == NULL) && (in == NULL)) BN_free(R);
BN_POOL_release(ctx,7);
return(ret);
}
static int BN_sub_word(BIGNUM *a, BN_ULONG w)
{
int i;
w &= BN_MASK2;
/* degenerate case: w is zero */
if (!w) return 1;
/* degenerate case: a is zero */
if(BN_is_zero(a))
{
i = BN_set_word(a,w);
if (i != 0)
{
DHPRINT("BN_set_negative\n");
/*BN_set_negative(a, 1); */
}
return i;
}
if ((a->top == 1) && (a->d[0] < w))
{
a->d[0]=w-a->d[0];
DHPRINT("--------\n");
return(1);
}
i=0;
for (;;)
{
if (a->d[i] >= w)
{
a->d[i]-=w;
break;
}
else
{
a->d[i]=(a->d[i]-w)&BN_MASK2;
i++;
w=1;
}
}
if ((a->d[i] == 0) && (i == (a->top-1)))
a->top--;
return(1);
}
static int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_POOL *ctx)
{
int ret = 0;
BIGNUM *Ri,*R;
BIGNUM tmod;
BN_ULONG buf[2];
if((Ri = BN_POOL_get(ctx)) == NULL) goto err;
R= &(mont->RR); /* grab RR as a temp */
if (!BN_copy(&(mont->N),mod)) goto err; /* Set N */
/*start*/
mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2;
BN_zero(R);
if (!(BN_set_bit(R,BN_BITS2))) goto err; /* R */
buf[0]=mod->d[0]; /* tmod = N mod word size */
buf[1]=0;
tmod.d=buf;
tmod.top = buf[0] != 0 ? 1 : 0;
tmod.dmax=2;
/* Ri = R^-1 mod N*/
if ((BN_mod_inverse(Ri,R,&tmod,ctx)) == NULL)
goto err;
if (!BN_lshift(Ri,Ri,BN_BITS2)) goto err; /* R*Ri */
if (!BN_is_zero(Ri))
{
if (!BN_sub_word(Ri,1)) goto err;
}
else /* if N mod word size == 1 */
{
if (!BN_set_word(Ri,BN_MASK2)) goto err; /* Ri-- (mod word size) */
}
if (!BN_div(Ri,NULL,Ri,&tmod,ctx)) goto err;
/* Ni = (R*Ri-1)/N,
* keep only least significant word: */
mont->n0 = (Ri->top > 0) ? Ri->d[0] : 0;
/*end*/
/* setup RR for conversions */
BN_zero(&(mont->RR));
if (!BN_set_bit(&(mont->RR),mont->ri*2)) goto err;
if (!BN_mod(&(mont->RR),&(mont->RR),&(mont->N),ctx)) goto err;
ret = 1;
err:
return ret;
}
static const EVP_MD sha1_md=
{
NID_sha1,
SHA_DIGEST_LENGTH,
0,
RAND_init,
RAND_update,
RAND_final,
sizeof(EVP_MD *)+sizeof(OPEN_SHA_CTX),
};
/* This call frees resources associated with the context */
static int EVP_MD_CTX_cleanup(EVP_MD_CTX *ctx)
{
/* Don't assume ctx->md_data was cleaned in EVP_Digest_Final,
* because sometimes only copies of the context are ever finalised.
*/
if (ctx->digest && ctx->digest->ctx_size && ctx->md_data)
{
os_free_mem(NULL, ctx->md_data);
}
memset(ctx,'\0',sizeof *ctx);
return 1;
}
/* The caller can assume that this removes any secret data from the context */
static int EVP_DigestFinal_ex(EVP_MD_CTX *ctx, unsigned char *md, unsigned int *size)
{
int ret;
ret=ctx->digest->final(ctx,md);
if (size != NULL)
*size=ctx->digest->md_size;
memset(ctx->md_data,0,ctx->digest->ctx_size);
return ret;
}
static void EVP_MD_CTX_init(EVP_MD_CTX *ctx)
{
memset(ctx,'\0',sizeof *ctx);
}
static int EVP_DigestInit_ex(EVP_MD_CTX *ctx, const EVP_MD *type)
{
if (ctx->digest != type)
{
if (ctx->digest && ctx->digest->ctx_size)
os_free_mem(NULL, ctx->md_data);
ctx->digest=type;
if (type->ctx_size)
os_alloc_mem(NULL, (UCHAR **)&(ctx->md_data), type->ctx_size);
}
return ctx->digest->init(ctx);
}
static int EVP_DigestUpdate(EVP_MD_CTX *ctx, const void *data,
size_t count)
{
return ctx->digest->update(ctx,data,count);
}
static void ssleay_rand_add(const void *buf, int num, long add)
{
int i,j,k,st_idx;
long md_c[2];
unsigned char local_md[SHA_DIGEST_LENGTH];
EVP_MD_CTX m;
/*
* (Based on the rand(3) manpage)
*
* The input is chopped up into units of 20 bytes (or less for
* the last block). Each of these blocks is run through the hash
* function as follows: The data passed to the hash function
* is the current 'md', the same number of bytes from the 'state'
* (the location determined by in incremented looping index) as
* the current 'block', the new key data 'block', and 'count'
* (which is incremented after each use).
* The result of this is kept in 'md' and also xored into the
* 'state' at the same locations that were used as input into the
* hash function.
*/
st_idx=g_state_index;
/* use our own copies of the counters so that even
* if a concurrent thread seeds with exactly the
* same data and uses the same subarray there's _some_
* difference */
md_c[0] = g_md_count[0];
md_c[1] = g_md_count[1];
memcpy(local_md, g_md, sizeof g_md);
/* g_state_index <= g_state_num <= STATE_SIZE */
g_state_index += num;
if (g_state_index >= STATE_SIZE)
{
g_state_index%=STATE_SIZE;
g_state_num=STATE_SIZE;
}
else if (g_state_num < STATE_SIZE)
{
if (g_state_index > g_state_num)
g_state_num=g_state_index;
}
/* g_state_index <= g_state_num <= STATE_SIZE */
/* g_state[st_idx], ..., g_state[(st_idx + num - 1) % STATE_SIZE]
* are what we will use now, but other threads may use them
* as well */
g_md_count[1] += (num / SHA_DIGEST_LENGTH) + (num % SHA_DIGEST_LENGTH > 0);
EVP_MD_CTX_init(&m);
for (i=0; i<num; i+=SHA_DIGEST_LENGTH)
{
j=(num-i);
j=(j > SHA_DIGEST_LENGTH)?SHA_DIGEST_LENGTH:j;
EVP_DigestInit_ex(&m,&sha1_md);
EVP_DigestUpdate(&m,local_md,SHA_DIGEST_LENGTH);
k=(st_idx+j)-STATE_SIZE;
if (k > 0)
{
EVP_DigestUpdate(&m,&(g_state[st_idx]),j-k);
EVP_DigestUpdate(&m,&(g_state[0]),k);
}
else
EVP_DigestUpdate(&m,&(g_state[st_idx]),j);
EVP_DigestUpdate(&m,buf,j);
EVP_DigestUpdate(&m,(unsigned char *)&(md_c[0]),sizeof(md_c));
EVP_DigestFinal_ex(&m,local_md,NULL);
md_c[1]++;
buf=(const char *)buf + j;
for (k=0; k<j; k++)
{
/* Parallel threads may interfere with this,
* but always each byte of the new state is
* the XOR of some previous value of its
* and local_md (itermediate values may be lost).
* Alway using locking could hurt performance more
* than necessary given that conflicts occur only
* when the total seeding is longer than the random
* state. */
g_state[st_idx++]^=local_md[k];
if (st_idx >= STATE_SIZE)
st_idx=0;
}
}
EVP_MD_CTX_cleanup(&m);
/* Don't just copy back local_md into md -- this could mean that
* other thread's seeding remains without effect (except for
* the incremented counter). By XORing it we keep at least as
* much entropy as fits into md. */
for (k = 0; k < (int)sizeof(g_md); k++)
{
g_md[k] ^= local_md[k];
}
if (g_entropy < ENTROPY_NEEDED) /* stop counting when we have enough */
g_entropy += add;
}
static int ssleay_rand_bytes(unsigned char *buf, int num)
{
static volatile int stirred_pool = 0;
int i,j,k,st_num,st_idx;
int num_ceil;
int ok;
long md_c[2];
unsigned char local_md[SHA_DIGEST_LENGTH];
EVP_MD_CTX m;
#ifndef GETPID_IS_MEANINGLESS
pid_t curr_pid = getpid();
#endif
int do_stir_pool = 0;
if (num <= 0)
return 1;
EVP_MD_CTX_init(&m);
/* round upwards to multiple of SHA_DIGEST_LENGTH/2 */
num_ceil = (1 + (num-1)/(SHA_DIGEST_LENGTH/2)) * (SHA_DIGEST_LENGTH/2);
/*
* (Based on the rand(3) manpage:)
*
* For each group of 10 bytes (or less), we do the following:
*
* Input into the hash function the local 'md' (which is RAND_initialized from
* the global 'md' before any bytes are generated), the bytes that are to
* be overwritten by the random bytes, and bytes from the 'state'
* (incrementing looping index). From this digest output (which is kept
* in 'md'), the top (up to) 10 bytes are returned to the caller and the
* bottom 10 bytes are xored into the 'state'.
*
* Finally, after we have finished 'num' random bytes for the
* caller, 'count' (which is incremented) and the local and global 'md'
* are fed into the hash function and the results are kept in the
* global 'md'.
*/
if (!RAND_initialized)
{
RAND_poll();
RAND_initialized = 1;
}
if (!stirred_pool)
do_stir_pool = 1;
ok = (g_entropy >= ENTROPY_NEEDED);
if (!ok)
{
/* If the PRNG state is not yet unpredictable, then seeing
* the PRNG output may help attackers to determine the new
* state; thus we have to decrease the entropy estimate.
* Once we've had enough initial seeding we don't bother to
* adjust the entropy count, though, because we're not ambitious
* to provide *information-theoretic* randomness.
*
* NOTE: This approach fails if the program forks before
* we have enough entropy. Entropy should be collected
* in a separate input pool and be transferred to the
* output pool only when the entropy limit has been reached.
*/
g_entropy -= num;
if (g_entropy < 0)
g_entropy = 0;
}
if (do_stir_pool)
{
/* In the output function only half of 'md' remains secret,
* so we better make sure that the required entropy gets
* 'evenly distributed' through 'state', our randomness pool.
* The input function (ssleay_rand_add) chains all of 'md',
* which makes it more suitable for this purpose.
*/
int n = STATE_SIZE; /* so that the complete pool gets accessed */
while (n > 0)
{
/* Note that the seed does not matter, it's just that
* ssleay_rand_add expects to have something to hash. */
/* at least SHA_DIGEST_LENGTH "...................." */
ssleay_rand_add("....................", SHA_DIGEST_LENGTH, 0);
n -= SHA_DIGEST_LENGTH;
}
if (ok)
stirred_pool = 1;
}
st_idx=g_state_index;
st_num=g_state_num;
md_c[0] = g_md_count[0];
md_c[1] = g_md_count[1];
memcpy(local_md, g_md, sizeof g_md);
g_state_index+=num_ceil;
if (g_state_index > g_state_num)
g_state_index %= g_state_num;
/* g_state[st_idx], ..., g_state[(st_idx + num_ceil - 1) % st_num]
* are now ours (but other threads may use them too) */
g_md_count[0] += 1;
while (num > 0)
{
/* num_ceil -= SHA_DIGEST_LENGTH/2 */
j=(num >= SHA_DIGEST_LENGTH/2)?SHA_DIGEST_LENGTH/2:num;
num-=j;
EVP_DigestInit_ex(&m,&sha1_md);
#ifndef GETPID_IS_MEANINGLESS
if (curr_pid) /* just in the first iteration to save time */
{
EVP_DigestUpdate(&m,(unsigned char*)&curr_pid,sizeof curr_pid);
curr_pid = 0;
}
#endif
EVP_DigestUpdate(&m,local_md,SHA_DIGEST_LENGTH);
EVP_DigestUpdate(&m,(unsigned char *)&(md_c[0]),sizeof(md_c));
#ifndef PURIFY
EVP_DigestUpdate(&m,buf,j); /* purify complains */
#endif
k=(st_idx+SHA_DIGEST_LENGTH/2)-st_num;
if (k > 0)
{
EVP_DigestUpdate(&m,&(g_state[st_idx]),SHA_DIGEST_LENGTH/2-k);
EVP_DigestUpdate(&m,&(g_state[0]),k);
}
else
EVP_DigestUpdate(&m,&(g_state[st_idx]),SHA_DIGEST_LENGTH/2);
EVP_DigestFinal_ex(&m,local_md,NULL);
for (i=0; i<SHA_DIGEST_LENGTH/2; i++)
{
g_state[st_idx++]^=local_md[i]; /* may compete with other threads */
if (st_idx >= st_num)
st_idx=0;
if (i < j)
*(buf++)=local_md[i+SHA_DIGEST_LENGTH/2];
}
}
EVP_DigestInit_ex(&m,&sha1_md);
EVP_DigestUpdate(&m,(unsigned char *)&(md_c[0]),sizeof(md_c));
EVP_DigestUpdate(&m,local_md,SHA_DIGEST_LENGTH);
EVP_DigestUpdate(&m,g_md,SHA_DIGEST_LENGTH);
EVP_DigestFinal_ex(&m,g_md,NULL);
EVP_MD_CTX_cleanup(&m);
if (ok)
return(1);
else
{
return(0);
}
}
static int RAND_poll(void)
{
unsigned long l=100;
unsigned char tmpbuf[ENTROPY_NEEDED];
int n = 0;
n=ENTROPY_NEEDED;
if (n > 0)
{
ssleay_rand_add(tmpbuf,sizeof tmpbuf,(long)n);
}
/* put in some default random data, we need more than just this */
l=100;/*curr_pid; */
ssleay_rand_add(&l,sizeof(l),0);
l=200;/*getuid(); */
ssleay_rand_add(&l,sizeof(l),0);
#ifdef USERSPACE
l=time(NULL);
#else
NdisGetSystemUpTime(&l);
#endif /*USERSPACE */
ssleay_rand_add(&l,sizeof(l),0);
return 0;
}
static int bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom)
{
unsigned char *buf=NULL;
int ret=0,bit,bytes,mask;
time_t tim;
if (bits == 0)
{
BN_zero(rnd);
return 1;
}
bytes=(bits+7)/8;
bit=(bits-1)%8;
mask=0xff<<(bit+1);
os_alloc_mem(NULL, (UCHAR **)&buf, bytes);
if (buf == NULL)
{
goto err;
}
/* make a random number and set the top and bottom bits */
#ifdef USERSPACE
time(&tim);
#else
NdisGetSystemUpTime((ULONG *)(&tim));
#endif /*USERSPACE */
ssleay_rand_add(&tim,sizeof(tim),0);
if (pseudorand)
{
if (ssleay_rand_bytes(buf, bytes) == -1)
goto err;
}
else
{
if (ssleay_rand_bytes(buf, bytes) <= 0)
goto err;
}
if (pseudorand == 2)
{
/* generate patterns that are more likely to trigger BN
library bugs */
int i;
unsigned char c;
for (i = 0; i < bytes; i++)
{
ssleay_rand_bytes(&c, 1);
if (c >= 128 && i > 0)
buf[i] = buf[i-1];
else if (c < 42)
buf[i] = 0;
else if (c < 84)
buf[i] = 255;
}
}
if (top != -1)
{
if (top)
{
if (bit == 0)
{
buf[0]=1;
buf[1]|=0x80;
}
else
{
buf[0]|=(3<<(bit-1));
}
}
else
{
buf[0]|=(1<<bit);
}
}
buf[0] &= ~mask;
if (bottom) /* set bottom bit if requested */
buf[bytes-1]|=1;
if (!BN_bin2bn(buf,bytes,rnd)) goto err;
ret=1;
err:
if (buf != NULL)
{
os_free_mem(NULL, buf);
}
return(ret);
}
static BN_MONT_CTX *BN_MONT_CTX_new(void)
{
BN_MONT_CTX *ret;
os_alloc_mem(NULL, (UCHAR **)&ret, sizeof(BN_MONT_CTX));
if (ret == NULL)
return(NULL);
ret->ri=0;
BN_init(&(ret->RR));
BN_init(&(ret->N));
ret->flags=BN_FLG_MALLOCED;
return(ret);
}
static void BN_CTX_free(BN_POOL *ctx)
{
if (ctx == NULL)
return;
BN_POOL_finish(ctx);
os_free_mem(NULL, ctx);
}
static void BN_MONT_CTX_free(BN_MONT_CTX *mont)
{
if(mont == NULL)
return;
BN_free(&(mont->RR));
BN_free(&(mont->N));
if (mont->flags & BN_FLG_MALLOCED)
os_free_mem(NULL, mont);
}
#define BN_MOD_MUL_WORD(r, w, m) (BN_mul_word(r, (w)) && ((BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
static int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
const BIGNUM *m, BN_POOL *ctx, BN_MONT_CTX *in_mont)
{
BN_MONT_CTX *mont = NULL;
int b, bits, ret=0;
int r_is_one;
BN_ULONG w, next_w;
BIGNUM *d, *r, *t;
BIGNUM *swap_tmp;
if (BN_get_flags(p, BN_FLG_EXP_CONSTTIME) != 0)
{
/* BN_FLG_EXP_CONSTTIME only supported by BN_mod_exp_mont() */
return -1;
}
if (!BN_is_odd(m))
{
return(0);
}
if (m->top == 1)
a %= m->d[0]; /* make sure that 'a' is reduced */
bits = BN_num_bits(p);
if (bits == 0)
{
ret = BN_one(rr);
return ret;
}
if (a == 0)
{
BN_zero(rr);
ret = 1;
return ret;
}
d = BN_POOL_get(ctx);
r = BN_POOL_get(ctx);
t = BN_POOL_get(ctx);
if (d == NULL || r == NULL || t == NULL) goto err;
if (in_mont != NULL)
mont=in_mont;
else
{
if ((mont = BN_MONT_CTX_new()) == NULL) goto err;
if (!BN_MONT_CTX_set(mont, m, ctx)) goto err;
}
r_is_one = 1; /* except for Montgomery factor */
/* bits-1 >= 0 */
/* The result is accumulated in the product r*w. */
w = a; /* bit 'bits-1' of 'p' is always set */
for (b = bits-2; b >= 0; b--)
{
/* First, square r*w. */
next_w = w*w;
if ((next_w/w) != w) /* overflow */
{
if (r_is_one)
{
if (!(BN_set_word(r, (w)) && BN_mod_mul_montgomery((r),(r),&((mont)->RR),(mont),(ctx)))) goto err;
r_is_one = 0;
}
else
{
if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
}
next_w = 1;
}
w = next_w;
if (!r_is_one)
{
if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) goto err;
}
/* Second, multiply r*w by 'a' if exponent bit is set. */
if (BN_is_bit_set(p, b))
{
next_w = w*a;
if ((next_w/a) != w) /* overflow */
{
if (r_is_one)
{
if (!(BN_set_word(r, (w)) && BN_mod_mul_montgomery((r),(r),&((mont)->RR),(mont),(ctx)))) goto err;
r_is_one = 0;
}
else
{
if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
}
next_w = a;
}
w = next_w;
}
}
/* Finally, set r:=r*w. */
if (w != 1)
{
if (r_is_one)
{
if (!(BN_set_word(r, (w)) && BN_mod_mul_montgomery((r),(r),&((mont)->RR),(mont),(ctx)))) goto err;
r_is_one = 0;
}
else
{
if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
}
}
if (r_is_one) /* can happen only if a == 1*/
{
if (!BN_one(rr)) goto err;
}
else
{
if (!BN_from_montgomery(rr, r, mont, ctx)) goto err;
}
ret = 1;
err:
if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
BN_POOL_release(ctx,3);
return(ret);
}
/* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout
* so that accessing any of these table values shows the same access pattern as far
* as cache lines are concerned. The following functions are used to transfer a BIGNUM
* from/to that table. */
static int MOD_EXP_CTIME_COPY_TO_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, int width)
{
size_t i, j;
if (bn_wexpand(b, top) == NULL)
return 0;
while (b->top < top)
{
b->d[b->top++] = 0;
}
for (i = 0, j=idx; i < top * sizeof b->d[0]; i++, j+=width)
{
buf[j] = ((unsigned char*)b->d)[i];
}
bn_correct_top(b);
return 1;
}
static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, int width)
{
size_t i, j;
if (bn_wexpand(b, top) == NULL)
return 0;
for (i=0, j=idx; i < top * sizeof b->d[0]; i++, j+=width)
{
((unsigned char*)b->d)[i] = buf[j];
}
b->top = top;
bn_correct_top(b);
return 1;
}
/* This variant of BN_mod_exp_mont() uses fixed windows and the special
* precomputation memory layout to limit data-dependency to a minimum
* to protect secret exponents (cf. the hyper-threading timing attacks
* pointed out by Colin Percival,
* http://www.daemonology.net/hyperthreading-considered-harmful
*/
static int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
const BIGNUM *m, BN_POOL *ctx, BN_MONT_CTX *in_mont)
{
int i,bits,ret=0,idx,window,wvalue;
int top;
BIGNUM *r;
const BIGNUM *aa;
BN_MONT_CTX *mont=NULL;
int numPowers;
unsigned char *powerbufFree=NULL;
int powerbufLen = 0;
unsigned char *powerbuf=NULL;
BIGNUM *computeTemp=NULL, *am=NULL;
top = m->top;
if (!(m->d[0] & 1))
{
return(0);
}
bits=BN_num_bits(p);
if (bits == 0)
{
ret = BN_one(rr);
return ret;
}
r = BN_POOL_get(ctx);
if (r == NULL) goto err;
/* Allocate a montgomery context if it was not supplied by the caller.
* If this is not done, things will break in the montgomery part.
*/
if (in_mont != NULL)
mont=in_mont;
else
{
if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
}
/* Get the window size to use with size of p. */
window = BN_window_bits_for_ctime_exponent_size(bits);
/* Allocate a buffer large enough to hold all of the pre-computed
* powers of a.
*/
numPowers = 1 << window;
powerbufLen = sizeof(m->d[0])*top*numPowers;
os_alloc_mem(NULL, (UCHAR **)&powerbufFree, powerbufLen+MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH);
if (powerbufFree == NULL)
goto err;
powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
memset(powerbuf, 0, powerbufLen);
/* Initialize the intermediate result. Do this early to save double conversion,
* once each for a^0 and intermediate result.
*/
if (!BN_mod_mul_montgomery((r),(BN_value_one()),&((mont)->RR),(mont),(ctx))) goto err;
if (!MOD_EXP_CTIME_COPY_TO_PREBUF(r, top, powerbuf, 0, numPowers)) goto err;
/* Initialize computeTemp as a^1 with montgomery precalcs */
computeTemp = BN_POOL_get(ctx);
am = BN_POOL_get(ctx);
if (computeTemp==NULL || am==NULL) goto err;
if (BN_ucmp(a,m) >= 0)
{
if (!BN_mod(am,a,m,ctx))
goto err;
aa= am;
}
else
aa=a;
if (!BN_mod_mul_montgomery((am),(aa),&((mont)->RR),(mont),(ctx))) goto err;
if (!BN_copy(computeTemp, am)) goto err;
if (!MOD_EXP_CTIME_COPY_TO_PREBUF(am, top, powerbuf, 1, numPowers)) goto err;
/* If the window size is greater than 1, then calculate
* val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1)
* (even powers could instead be computed as (a^(i/2))^2
* to use the slight performance advantage of sqr over mul).
*/
if (window > 1)
{
for (i=2; i<numPowers; i++)
{
/* Calculate a^i = a^(i-1) * a */
if (!BN_mod_mul_montgomery(computeTemp,am,computeTemp,mont,ctx)) goto err;
if (!MOD_EXP_CTIME_COPY_TO_PREBUF(computeTemp, top, powerbuf, i, numPowers)) goto err;
}
}
/* Adjust the number of bits up to a multiple of the window size.
* If the exponent length is not a multiple of the window size, then
* this pads the most significant bits with zeros to normalize the
* scanning loop to there's no special cases.
*
* * NOTE: Making the window size a power of two less than the native
* * word size ensures that the padded bits won't go past the last
* * word in the internal BIGNUM structure. Going past the end will
* * still produce the correct result, but causes a different branch
* * to be taken in the BN_is_bit_set function.
*/
bits = ((bits+window-1)/window)*window;
idx=bits-1; /* The top bit of the window */
/* Scan the exponent one window at a time starting from the most
* significant bits.
*/
while (idx >= 0)
{
wvalue=0; /* The 'value' of the window */
/* Scan the window, squaring the result as we go */
for (i=0; i<window; i++,idx--)
{
if (!BN_mod_mul_montgomery(r,r,r,mont,ctx)) goto err;
wvalue = (wvalue<<1)+BN_is_bit_set(p,idx);
}
/* Fetch the appropriate pre-computed value from the pre-buf */
if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(computeTemp, top, powerbuf, wvalue, numPowers)) goto err;
/* Multiply the result into the intermediate result */
if (!BN_mod_mul_montgomery(r,r,computeTemp,mont,ctx)) goto err;
}
/* Convert the final result from montgomery to standard format */
if (!BN_from_montgomery(rr,r,mont,ctx)) goto err;
ret=1;
err:
if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
if (powerbuf!=NULL)
{
os_free_mem(NULL, powerbufFree);
}
if (am!=NULL) BN_clear(am);
if (computeTemp!=NULL) BN_clear(computeTemp);
BN_POOL_release(ctx,3);
return(ret);
}
static int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
const BIGNUM *m, BN_POOL *ctx, BN_MONT_CTX *in_mont)
{
int release=0;
int i,j,bits,ret=0,wstart,wend,window,wvalue;
int start=1;
BIGNUM *d,*r;
const BIGNUM *aa;
/* Table of variables obtained from 'ctx' */
BIGNUM *val[EXP_TABLE_SIZE];
BN_MONT_CTX *mont=NULL;
if (BN_get_flags(p, BN_FLG_EXP_CONSTTIME) != 0)
{
return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont);
}
if (!BN_is_odd(m))
{
return(0);
}
bits=BN_num_bits(p);
if (bits == 0)
{
ret = BN_one(rr);
return ret;
}
d = BN_POOL_get(ctx);
r = BN_POOL_get(ctx);
val[0] = BN_POOL_get(ctx);
release=3;
if (!d || !r || !val[0]) goto err;
/* If this is not done, things will break in the montgomery
* part */
if (in_mont != NULL)
mont=in_mont;
else
{
if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
}
if (BN_ucmp(a,m) >= 0)
{
if (!(BN_mod(val[0], a, m, ctx))) goto err;
aa= val[0];
}
else
aa=a;
if (BN_is_zero(aa))
{
BN_zero(rr);
ret = 1;
goto err;
}
if (!BN_mod_mul_montgomery((val[0]),(aa),&((mont)->RR),(mont),(ctx))) goto err; /* 1 */
window = BN_window_bits_for_exponent_size(bits);
if (window > 1)
{
if (!BN_mod_mul_montgomery(d,val[0],val[0],mont,ctx)) goto err; /* 2 */
j=1<<(window-1);
for (i=1; i<j; i++)
{
if(((val[i] = BN_POOL_get(ctx)) == NULL) ||
!BN_mod_mul_montgomery(val[i],val[i-1],
d,mont,ctx))
goto err;
release++;
}
}
start=1; /* This is used to avoid multiplication etc
* when there is only the value '1' in the
* buffer. */
wvalue=0; /* The 'value' of the window */
wstart=bits-1; /* The top bit of the window */
wend=0; /* The bottom bit of the window */
if (!BN_mod_mul_montgomery((r),(BN_value_one()),&((mont)->RR),(mont),(ctx))) goto err;
for (;;)
{
if (BN_is_bit_set(p,wstart) == 0)
{
if (!start)
{
if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
goto err;
}
if (wstart == 0) break;
wstart--;
continue;
}
/* We now have wstart on a 'set' bit, we now need to work out
* how bit a window to do. To do this we need to scan
* forward until the last set bit before the end of the
* window */
j=wstart;
wvalue=1;
wend=0;
for (i=1; i<window; i++)
{
if (wstart-i < 0) break;
if (BN_is_bit_set(p,wstart-i))
{
wvalue<<=(i-wend);
wvalue|=1;
wend=i;
}
}
/* wend is the size of the current window */
j=wend+1;
/* add the 'bytes above' */
if (!start)
for (i=0; i<j; i++)
{
if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
goto err;
}
/* wvalue will be an odd number < 2^window */
if (!BN_mod_mul_montgomery(r,r,val[wvalue>>1],mont,ctx))
goto err;
/* move the 'window' down further */
wstart-=wend+1;
wvalue=0;
start=0;
if (wstart < 0) break;
}
if (!BN_from_montgomery(rr,r,mont,ctx)) goto err;
ret=1;
err:
if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
BN_POOL_release(ctx,release);
return(ret);
}
/*KERNEL MODULE TRACE*/
static unsigned char DH_P_VALUE[DH_KEY_LEN] =
{
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xC9, 0x0F, 0xDA, 0xA2, 0x21, 0x68, 0xC2, 0x34,
0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1,
0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74,
0x02, 0x0B, 0xBE, 0xA6, 0x3B, 0x13, 0x9B, 0x22,
0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD,
0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B,
0x30, 0x2B, 0x0A, 0x6D, 0xF2, 0x5F, 0x14, 0x37,
0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45,
0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6,
0xF4, 0x4C, 0x42, 0xE9, 0xA6, 0x37, 0xED, 0x6B,
0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED,
0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5,
0xAE, 0x9F, 0x24, 0x11, 0x7C, 0x4B, 0x1F, 0xE6,
0x49, 0x28, 0x66, 0x51, 0xEC, 0xE4, 0x5B, 0x3D,
0xC2, 0x00, 0x7C, 0xB8, 0xA1, 0x63, 0xBF, 0x05,
0x98, 0xDA, 0x48, 0x36, 0x1C, 0x55, 0xD3, 0x9A,
0x69, 0x16, 0x3F, 0xA8, 0xFD, 0x24, 0xCF, 0x5F,
0x83, 0x65, 0x5D, 0x23, 0xDC, 0xA3, 0xAD, 0x96,
0x1C, 0x62, 0xF3, 0x56, 0x20, 0x85, 0x52, 0xBB,
0x9E, 0xD5, 0x29, 0x07, 0x70, 0x96, 0x96, 0x6D,
0x67, 0x0C, 0x35, 0x4E, 0x4A, 0xBC, 0x98, 0x04,
0xF1, 0x74, 0x6C, 0x08, 0xCA, 0x23, 0x73, 0x27,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
};
static DH *DH_new_method(void)
{
DH *ret;
os_alloc_mem(NULL, (UCHAR **)&ret, sizeof(DH));
if (ret == NULL)
{
return(NULL);
}
ret->p=NULL;
ret->g=NULL;
ret->pub_key=NULL;
ret->priv_key=NULL;
ret->flags=0;
return(ret);
}
static int dh_bn_mod_exp(const DH *dh, BIGNUM *r,
const BIGNUM *a, const BIGNUM *p,
const BIGNUM *m, BN_POOL *ctx,
BN_MONT_CTX *m_ctx)
{
/* If a is only one word long and constant time is false, use the faster
* exponenentiation function.
*/
if (a->top == 1 && ((dh->flags & DH_FLAG_NO_EXP_CONSTTIME) != 0))
{
BN_ULONG A = a->d[0];
return BN_mod_exp_mont_word(r,A,p,m,ctx,m_ctx);
}
else
return BN_mod_exp_mont(r,a,p,m,ctx,m_ctx);
}
static int generate_key(DH *dh)
{
int ok=0;
int generate_new_key=0;
unsigned lvar;
BN_POOL *ctx;
BN_MONT_CTX *mont=NULL;
BIGNUM *pub_key=NULL,*priv_key=NULL;
ctx = BN_CTX_new();
if (ctx == NULL) goto err;
if (dh->priv_key == NULL)
{
priv_key=BN_new();
if (priv_key == NULL) goto err;
generate_new_key=1;
}
else
priv_key=dh->priv_key;
if (dh->pub_key == NULL)
{
pub_key=BN_new();
if (pub_key == NULL) goto err;
}
else
pub_key=dh->pub_key;
if (generate_new_key)
{
lvar = BN_num_bits(dh->p)-1; /* secret exponent length */
if (!bnrand(0, priv_key, lvar, 0, 0)) goto err;
}
{
BIGNUM local_prk;
BIGNUM *prk;
if ((dh->flags & DH_FLAG_NO_EXP_CONSTTIME) == 0)
{
BN_init(&local_prk);
prk = &local_prk;
BN_with_flags(prk, priv_key, BN_FLG_EXP_CONSTTIME);
}
else
prk = priv_key;
if (!dh_bn_mod_exp(dh, pub_key, dh->g, prk, dh->p, ctx, mont)) goto err;
}
dh->pub_key=pub_key;
dh->priv_key=priv_key;
ok=1;
err:
if ((pub_key != NULL) && (dh->pub_key == NULL)) BN_free(pub_key);
if ((priv_key != NULL) && (dh->priv_key == NULL)) BN_free(priv_key);
BN_CTX_free(ctx);
return(ok);
}
static void DH_free(DH *r)
{
if(r == NULL) return;
if (r->p != NULL) BN_clear_free(r->p);
if (r->g != NULL) BN_clear_free(r->g);
if (r->pub_key != NULL) BN_clear_free(r->pub_key);
if (r->priv_key != NULL) BN_clear_free(r->priv_key);
os_free_mem(NULL, r);
}
static int compute_key(unsigned char *key, const BIGNUM *pub_key, DH *dh)
{
BN_POOL *ctx=NULL;
BN_MONT_CTX *mont=NULL;
BIGNUM *tmp;
int ret= -1;
if (BN_num_bits(dh->p) > OPENSSL_DH_MAX_MODULUS_BITS) goto err;
ctx = BN_CTX_new();
if (ctx == NULL) goto err;
tmp = BN_POOL_get(ctx);
if (dh->priv_key == NULL) goto err;
if (!dh_bn_mod_exp(dh, tmp, pub_key, dh->priv_key,dh->p,ctx,mont)) goto err;
ret=BN_bn2bin(tmp,key);
err:
BN_CTX_free(ctx);
return(ret);
}
static DH *DHSecret=NULL;
static DH *DHSecretR=NULL;
void GenerateDHPublicKey(unsigned char *ran_buf, int ran_len,
unsigned char *dhkey, int *dhkey_len)
{
unsigned int DH_G_VALUE = 2;
unsigned int g = htonl(DH_G_VALUE);
*dhkey_len = 0;
if(DHSecret == NULL)
{
DHSecret = DH_new_method();
if(NULL == DHSecret) goto err;
DHSecret->p = BN_new();
DHSecret->g = BN_new();
}
else if((DHSecret != NULL) && (DHSecretR == NULL))
{
DH_free(DHSecret);
DHSecret = NULL;
DHSecret = DH_new_method();
if(NULL == DHSecret) goto err;
DHSecret->p = BN_new();
DHSecret->g = BN_new();
}
else if((DHSecret != NULL) && (DHSecretR != NULL))
{
DHPRINT("reuse DHSecret\n");
}
BN_bin2bn(DH_P_VALUE, DH_KEY_LEN, DHSecret->p);
BN_bin2bn((const unsigned char *)&g, 4, DHSecret->g);
/*
if (!DH_check(DHSecret, &i)) goto err;
if (i & DH_CHECK_P_NOT_PRIME)
DHPRINT( "p value is not prime\n");
if (i & DH_CHECK_P_NOT_SAFE_PRIME)
DHPRINT( "p value is not a safe prime\n");
if (i & DH_UNABLE_TO_CHECK_GENERATOR)
DHPRINT( "unable to check the generator value\n");
if (i & DH_NOT_SUITABLE_GENERATOR)
DHPRINT( "the g value is not a generator\n");
*/
/*DHSecret->flags &= ~DH_FLAG_NO_EXP_CONSTTIME; */
DHSecret->flags |= DH_FLAG_NO_EXP_CONSTTIME;
if (!generate_key(DHSecret)) goto err;
BN_bn2bin(DHSecret->pub_key,dhkey);
*dhkey_len = BN_num_bytes(DHSecret->pub_key);
DHPRINT("GenerateDHPublicKey Success dhkey_len%d\n",*dhkey_len);
/* DHSecret will use and free on GenerateDHSecreteKey. NO free it */
return;
err:
DH_free(DHSecret);
DHSecret = NULL;
DHPRINT("GenerateDHPublicKey EXIT\n");
return;
}
/******* KDK generation ****** */
/*generate the DH shared secret */
void GenerateDHSecreteKey(unsigned char *ran_buf, int ran_len,
unsigned char *peer_dhkey, int peer_dhkey_len,
unsigned char *secrete_dhkey, int *secrete_dhkey_len)
{
BIGNUM DH_PubKey_Peer;
*secrete_dhkey_len = 0;
if(NULL == secrete_dhkey) goto err;
if(NULL == DHSecret) goto err;
DH_PubKey_Peer.flags=BN_FLG_MALLOCED;
DH_PubKey_Peer.top=0;
DH_PubKey_Peer.dmax=0;
DH_PubKey_Peer.d=NULL;
BN_bin2bn(peer_dhkey, peer_dhkey_len, &DH_PubKey_Peer);
*secrete_dhkey_len = compute_key(secrete_dhkey,&DH_PubKey_Peer,DHSecret);
if (DH_PubKey_Peer.d != NULL)
os_free_mem(NULL, DH_PubKey_Peer.d);
DHPRINT("GenerateDHSecreteKey Success\n");
err:
DH_free(DHSecret);
DHSecret = NULL;
DHPRINT("GenerateDHSecreteKey EXIT\n");
return;
}
void DH_freeall(void)
{
DH_free(DHSecret);
DHSecret = NULL;
return;
}
#else /* CRYPT_GPL_ALGORITHM */
/*
========================================================================
Routine Description:
Diffie-Hellman public key generation
Arguments:
GValue Array in UINT8
GValueLength The length of G in bytes
PValue Array in UINT8
PValueLength The length of P in bytes
PrivateKey Private key
PrivateKeyLength The length of Private key in bytes
Return Value:
PublicKey Public key
PublicKeyLength The length of public key in bytes
Note:
Reference to RFC2631
PublicKey = G^PrivateKey (mod P)
========================================================================
*/
void DH_PublicKey_Generate (
IN UINT8 GValue[],
IN UINT GValueLength,
IN UINT8 PValue[],
IN UINT PValueLength,
IN UINT8 PrivateKey[],
IN UINT PrivateKeyLength,
OUT UINT8 PublicKey[],
INOUT UINT *PublicKeyLength)
{
PBIG_INTEGER pBI_G = NULL;
PBIG_INTEGER pBI_P = NULL;
PBIG_INTEGER pBI_PrivateKey = NULL;
PBIG_INTEGER pBI_PublicKey = NULL;
/*
* 1. Check the input parameters
* - GValueLength, PValueLength and PrivateLength must be large than zero
* - PublicKeyLength must be large or equal than PValueLength
* - PValue must be odd
*
* - PValue must be prime number (no implement)
* - GValue must be greater than 0 but less than the PValue (no implement)
*/
if (GValueLength == 0) {
DBGPRINT(RT_DEBUG_ERROR, ("DH_PublicKey_Generate: G length is (%d)\n", GValueLength));
return;
} /* End of if */
if (PValueLength == 0) {
DBGPRINT(RT_DEBUG_ERROR, ("DH_PublicKey_Generate: P length is (%d)\n", PValueLength));
return;
} /* End of if */
if (PrivateKeyLength == 0) {
DBGPRINT(RT_DEBUG_ERROR, ("DH_PublicKey_Generate: private key length is (%d)\n", PrivateKeyLength));
return;
} /* End of if */
if (*PublicKeyLength < PValueLength) {
DBGPRINT(RT_DEBUG_ERROR, ("DH_PublicKey_Generate: public key length(%d) must be large or equal than P length(%d)\n",
*PublicKeyLength, PValueLength));
return;
} /* End of if */
if (!(PValue[PValueLength - 1] & 0x1)) {
DBGPRINT(RT_DEBUG_ERROR, ("DH_PublicKey_Generate: P value must be odd\n"));
return;
} /* End of if */
/*
* 2. Transfer parameters to BigInteger structure
*/
BigInteger_Init(&pBI_G);
BigInteger_Init(&pBI_P);
BigInteger_Init(&pBI_PrivateKey);
BigInteger_Init(&pBI_PublicKey);
BigInteger_Bin2BI(GValue, GValueLength, &pBI_G);
BigInteger_Bin2BI(PValue, PValueLength, &pBI_P);
BigInteger_Bin2BI(PrivateKey, PrivateKeyLength, &pBI_PrivateKey);
/*
* 3. Calculate PublicKey = G^PrivateKey (mod P)
* - BigInteger Operation
* - Montgomery reduction
*/
BigInteger_Montgomery_ExpMod(pBI_G, pBI_PrivateKey, pBI_P, &pBI_PublicKey);
/*
* 4. Transfer BigInteger structure to char array
*/
BigInteger_BI2Bin(pBI_PublicKey, PublicKey, PublicKeyLength);
BigInteger_Free(&pBI_G);
BigInteger_Free(&pBI_P);
BigInteger_Free(&pBI_PrivateKey);
BigInteger_Free(&pBI_PublicKey);
} /* End of DH_PublicKey_Generate */
/*
========================================================================
Routine Description:
Diffie-Hellman secret key generation
Arguments:
PublicKey Public key
PublicKeyLength The length of Public key in bytes
PValue Array in UINT8
PValueLength The length of P in bytes
PrivateKey Private key
PrivateKeyLength The length of Private key in bytes
Return Value:
SecretKey Secret key
SecretKeyLength The length of secret key in bytes
Note:
Reference to RFC2631
SecretKey = PublicKey^PrivateKey (mod P)
========================================================================
*/
void DH_SecretKey_Generate (
IN UINT8 PublicKey[],
IN UINT PublicKeyLength,
IN UINT8 PValue[],
IN UINT PValueLength,
IN UINT8 PrivateKey[],
IN UINT PrivateKeyLength,
OUT UINT8 SecretKey[],
INOUT UINT *SecretKeyLength)
{
PBIG_INTEGER pBI_P = NULL;
PBIG_INTEGER pBI_SecretKey = NULL;
PBIG_INTEGER pBI_PrivateKey = NULL;
PBIG_INTEGER pBI_PublicKey = NULL;
/*
* 1. Check the input parameters
* - PublicKeyLength, PValueLength and PrivateLength must be large than zero
* - SecretKeyLength must be large or equal than PValueLength
* - PValue must be odd
*
* - PValue must be prime number (no implement)
*/
if (PublicKeyLength == 0) {
DBGPRINT(RT_DEBUG_ERROR, ("DH_SecretKey_Generate: public key length is (%d)\n", PublicKeyLength));
return;
} /* End of if */
if (PValueLength == 0) {
DBGPRINT(RT_DEBUG_ERROR, ("DH_SecretKey_Generate: P length is (%d)\n", PValueLength));
return;
} /* End of if */
if (PrivateKeyLength == 0) {
DBGPRINT(RT_DEBUG_ERROR, ("DH_SecretKey_Generate: private key length is (%d)\n", PrivateKeyLength));
return;
} /* End of if */
if (*SecretKeyLength < PValueLength) {
DBGPRINT(RT_DEBUG_ERROR, ("DH_SecretKey_Generate: secret key length(%d) must be large or equal than P length(%d)\n",
*SecretKeyLength, PValueLength));
return;
} /* End of if */
if (!(PValue[PValueLength - 1] & 0x1)) {
DBGPRINT(RT_DEBUG_ERROR, ("DH_SecretKey_Generate: P value must be odd\n"));
return;
} /* End of if */
/*
* 2. Transfer parameters to BigInteger structure
*/
BigInteger_Init(&pBI_P);
BigInteger_Init(&pBI_PrivateKey);
BigInteger_Init(&pBI_PublicKey);
BigInteger_Init(&pBI_SecretKey);
BigInteger_Bin2BI(PublicKey, PublicKeyLength, &pBI_PublicKey);
BigInteger_Bin2BI(PValue, PValueLength, &pBI_P);
BigInteger_Bin2BI(PrivateKey, PrivateKeyLength, &pBI_PrivateKey);
/*
* 3. Calculate SecretKey = PublicKey^PrivateKey (mod P)
* - BigInteger Operation
* - Montgomery reduction
*/
BigInteger_Montgomery_ExpMod(pBI_PublicKey, pBI_PrivateKey, pBI_P, &pBI_SecretKey);
/*
* 4. Transfer BigInteger structure to char array
*/
BigInteger_BI2Bin(pBI_SecretKey, SecretKey, SecretKeyLength);
BigInteger_Free(&pBI_P);
BigInteger_Free(&pBI_PrivateKey);
BigInteger_Free(&pBI_PublicKey);
BigInteger_Free(&pBI_SecretKey);
} /* End of DH_SecretKey_Generate */
#endif /* CRYPT_GPL_ALGORITHM */