mirror of
https://github.com/clearml/dropbear
synced 2025-06-26 18:17:32 +00:00
Compare commits
5 Commits
LTM_DB_0.4
...
LTM_DB_0.4
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
52b6ae0d97 | ||
|
|
6ab9858dec | ||
|
|
0ac65f3f84 | ||
|
|
a89b26e778 | ||
|
|
180935d8cd |
40
Makefile.in
40
Makefile.in
@@ -2,6 +2,9 @@
|
||||
#
|
||||
#Tom St Denis
|
||||
|
||||
#version of library
|
||||
VERSION=0.35
|
||||
|
||||
VPATH=@srcdir@
|
||||
srcdir=@srcdir@
|
||||
|
||||
@@ -11,7 +14,7 @@ CFLAGS += -I$(srcdir)
|
||||
#CFLAGS += -I./ -Wall -W -Wshadow -Wsign-compare
|
||||
|
||||
#for speed
|
||||
#CFLAGS += -O3 -funroll-loops
|
||||
#CFLAGS += -O3 -funroll-all-loops
|
||||
|
||||
#for size
|
||||
#CFLAGS += -Os
|
||||
@@ -22,13 +25,15 @@ CFLAGS += -I$(srcdir)
|
||||
#debug
|
||||
#CFLAGS += -g3
|
||||
|
||||
VERSION=0.32
|
||||
#install as this user
|
||||
USER=root
|
||||
GROUP=root
|
||||
|
||||
default: libtommath.a
|
||||
|
||||
#default files to install
|
||||
LIBNAME=libtommath.a
|
||||
HEADERS=tommath.h
|
||||
HEADERS=tommath.h tommath_class.h tommath_superclass.h
|
||||
|
||||
#LIBPATH-The directory for libtommath to be installed to.
|
||||
#INCPATH-The directory to install the header files for libtommath.
|
||||
@@ -58,17 +63,18 @@ bn_mp_prime_is_prime.o bn_mp_prime_next_prime.o bn_mp_dr_reduce.o \
|
||||
bn_mp_dr_is_modulus.o bn_mp_dr_setup.o bn_mp_reduce_setup.o \
|
||||
bn_mp_toom_mul.o bn_mp_toom_sqr.o bn_mp_div_3.o bn_s_mp_exptmod.o \
|
||||
bn_mp_reduce_2k.o bn_mp_reduce_is_2k.o bn_mp_reduce_2k_setup.o \
|
||||
bn_mp_reduce_2k_l.o bn_mp_reduce_is_2k_l.o bn_mp_reduce_2k_setup_l.o \
|
||||
bn_mp_radix_smap.o bn_mp_read_radix.o bn_mp_toradix.o bn_mp_radix_size.o \
|
||||
bn_mp_fread.o bn_mp_fwrite.o bn_mp_cnt_lsb.o bn_error.o \
|
||||
bn_mp_init_multi.o bn_mp_clear_multi.o bn_mp_exteuclid.o bn_mp_toradix_n.o \
|
||||
bn_mp_prime_random_ex.o bn_mp_get_int.o bn_mp_sqrt.o bn_mp_is_square.o bn_mp_init_set.o \
|
||||
bn_mp_init_set_int.o bn_mp_invmod_slow.o bn_mp_prime_rabin_miller_trials.o
|
||||
bn_mp_init_set_int.o bn_mp_invmod_slow.o bn_mp_prime_rabin_miller_trials.o \
|
||||
bn_mp_to_signed_bin_n.o bn_mp_to_unsigned_bin_n.o
|
||||
|
||||
libtommath.a: $(OBJECTS)
|
||||
$(AR) $(ARFLAGS) libtommath.a $(OBJECTS)
|
||||
$(RANLIB) libtommath.a
|
||||
|
||||
|
||||
#make a profiled library (takes a while!!!)
|
||||
#
|
||||
# This will build the library with profile generation
|
||||
@@ -93,19 +99,19 @@ profiled_single:
|
||||
ranlib libtommath.a
|
||||
|
||||
install: libtommath.a
|
||||
install -d -g root -o root $(DESTDIR)$(LIBPATH)
|
||||
install -d -g root -o root $(DESTDIR)$(INCPATH)
|
||||
install -g root -o root $(LIBNAME) $(DESTDIR)$(LIBPATH)
|
||||
install -g root -o root $(HEADERS) $(DESTDIR)$(INCPATH)
|
||||
install -d -g $(GROUP) -o $(USER) $(DESTDIR)$(LIBPATH)
|
||||
install -d -g $(GROUP) -o $(USER) $(DESTDIR)$(INCPATH)
|
||||
install -g $(GROUP) -o $(USER) $(LIBNAME) $(DESTDIR)$(LIBPATH)
|
||||
install -g $(GROUP) -o $(USER) $(HEADERS) $(DESTDIR)$(INCPATH)
|
||||
|
||||
test: libtommath.a demo/demo.o
|
||||
$(CC) demo/demo.o libtommath.a -o test
|
||||
$(CC) $(CFLAGS) demo/demo.o libtommath.a -o test
|
||||
|
||||
mtest: test
|
||||
cd mtest ; $(CC) $(CFLAGS) mtest.c -o mtest -s
|
||||
cd mtest ; $(CC) $(CFLAGS) mtest.c -o mtest
|
||||
|
||||
timing: libtommath.a
|
||||
$(CC) $(CFLAGS) -DTIMER demo/timing.c libtommath.a -o ltmtest -s
|
||||
$(CC) $(CFLAGS) -DTIMER demo/timing.c libtommath.a -o ltmtest
|
||||
|
||||
# makes the LTM book DVI file, requires tetex, perl and makeindex [part of tetex I think]
|
||||
docdvi: tommath.src
|
||||
@@ -145,11 +151,11 @@ pretty:
|
||||
perl pretty.build
|
||||
|
||||
clean:
|
||||
-rm -f *.bat *.pdf *.o *.a *.obj *.lib *.exe *.dll etclib/*.o demo/demo.o test ltmtest mpitest mtest/mtest mtest/mtest.exe \
|
||||
*.idx *.toc *.log *.aux *.dvi *.lof *.ind *.ilg *.ps *.log *.s mpi.c *.da *.dyn *.dpi tommath.tex `find . -type f | grep [~] | xargs` *.lo *.la
|
||||
-rm -rf .libs
|
||||
-cd etc && make clean
|
||||
-cd pics && make clean
|
||||
rm -f *.bat *.pdf *.o *.a *.obj *.lib *.exe *.dll etclib/*.o demo/demo.o test ltmtest mpitest mtest/mtest mtest/mtest.exe \
|
||||
*.idx *.toc *.log *.aux *.dvi *.lof *.ind *.ilg *.ps *.log *.s mpi.c *.da *.dyn *.dpi tommath.tex *.lo *.la
|
||||
rm -rf .libs
|
||||
cd etc && make clean
|
||||
cd pics && make clean
|
||||
|
||||
zipup: clean manual poster docs
|
||||
perl gen.pl ; mv mpi.c pre_gen/ ; \
|
||||
|
||||
16
TODO
Normal file
16
TODO
Normal file
@@ -0,0 +1,16 @@
|
||||
things for book in order of importance...
|
||||
|
||||
- Fix up pseudo-code [only] for combas that are not consistent with source
|
||||
- Start in chapter 3 [basics] and work up...
|
||||
- re-write to prose [less abrupt]
|
||||
- clean up pseudo code [spacing]
|
||||
- more examples where appropriate and figures
|
||||
|
||||
Goal:
|
||||
- Get sync done by mid January [roughly 8-12 hours work]
|
||||
- Finish ch3-6 by end of January [roughly 12-16 hours of work]
|
||||
- Finish ch7-end by mid Feb [roughly 20-24 hours of work].
|
||||
|
||||
Goal isn't "first edition" but merely cleaner to read.
|
||||
|
||||
|
||||
29
bn.tex
29
bn.tex
@@ -49,7 +49,7 @@
|
||||
\begin{document}
|
||||
\frontmatter
|
||||
\pagestyle{empty}
|
||||
\title{LibTomMath User Manual \\ v0.32}
|
||||
\title{LibTomMath User Manual \\ v0.35}
|
||||
\author{Tom St Denis \\ tomstdenis@iahu.ca}
|
||||
\maketitle
|
||||
This text, the library and the accompanying textbook are all hereby placed in the public domain. This book has been
|
||||
@@ -263,12 +263,12 @@ are the pros and cons of LibTomMath by comparing it to the math routines from Gn
|
||||
\begin{center}
|
||||
\begin{tabular}{|l|c|c|l|}
|
||||
\hline \textbf{Criteria} & \textbf{Pro} & \textbf{Con} & \textbf{Notes} \\
|
||||
\hline Few lines of code per file & X & & GnuPG $ = 300.9$, LibTomMath $ = 76.04$ \\
|
||||
\hline Few lines of code per file & X & & GnuPG $ = 300.9$, LibTomMath $ = 71.97$ \\
|
||||
\hline Commented function prototypes & X && GnuPG function names are cryptic. \\
|
||||
\hline Speed && X & LibTomMath is slower. \\
|
||||
\hline Totally free & X & & GPL has unfavourable restrictions.\\
|
||||
\hline Large function base & X & & GnuPG is barebones. \\
|
||||
\hline Four modular reduction algorithms & X & & Faster modular exponentiation. \\
|
||||
\hline Five modular reduction algorithms & X & & Faster modular exponentiation for a variety of moduli. \\
|
||||
\hline Portable & X & & GnuPG requires configuration to build. \\
|
||||
\hline
|
||||
\end{tabular}
|
||||
@@ -284,9 +284,12 @@ would require when working with large integers.
|
||||
So it may feel tempting to just rip the math code out of GnuPG (or GnuMP where it was taken from originally) in your
|
||||
own application but I think there are reasons not to. While LibTomMath is slower than libraries such as GnuMP it is
|
||||
not normally significantly slower. On x86 machines the difference is normally a factor of two when performing modular
|
||||
exponentiations.
|
||||
exponentiations. It depends largely on the processor, compiler and the moduli being used.
|
||||
|
||||
Essentially the only time you wouldn't use LibTomMath is when blazing speed is the primary concern.
|
||||
Essentially the only time you wouldn't use LibTomMath is when blazing speed is the primary concern. However,
|
||||
on the other side of the coin LibTomMath offers you a totally free (public domain) well structured math library
|
||||
that is very flexible, complete and performs well in resource contrained environments. Fast RSA for example can
|
||||
be performed with as little as 8KB of ram for data (again depending on build options).
|
||||
|
||||
\chapter{Getting Started with LibTomMath}
|
||||
\section{Building Programs}
|
||||
@@ -809,7 +812,7 @@ mp\_int variables based on their digits only.
|
||||
|
||||
\index{mp\_cmp\_mag}
|
||||
\begin{alltt}
|
||||
int mp_cmp(mp_int * a, mp_int * b);
|
||||
int mp_cmp_mag(mp_int * a, mp_int * b);
|
||||
\end{alltt}
|
||||
This will compare $a$ to $b$ placing $a$ to the left of $b$. This function cannot fail and will return one of the
|
||||
three compare codes listed in figure \ref{fig:CMP}.
|
||||
@@ -1220,12 +1223,13 @@ int mp_sqr (mp_int * a, mp_int * b);
|
||||
\end{alltt}
|
||||
|
||||
Will square $a$ and store it in $b$. Like the case of multiplication there are four different squaring
|
||||
algorithms all which can be called from mp\_sqr(). It is ideal to use mp\_sqr over mp\_mul when squaring terms.
|
||||
algorithms all which can be called from mp\_sqr(). It is ideal to use mp\_sqr over mp\_mul when squaring terms because
|
||||
of the speed difference.
|
||||
|
||||
\section{Tuning Polynomial Basis Routines}
|
||||
|
||||
Both of the Toom-Cook and Karatsuba multiplication algorithms are faster than the traditional $O(n^2)$ approach that
|
||||
the Comba and baseline algorithms use. At $O(n^{1.464973})$ and $O(n^{1.584962})$ running times respectfully they require
|
||||
the Comba and baseline algorithms use. At $O(n^{1.464973})$ and $O(n^{1.584962})$ running times respectively they require
|
||||
considerably less work. For example, a 10000-digit multiplication would take roughly 724,000 single precision
|
||||
multiplications with Toom-Cook or 100,000,000 single precision multiplications with the standard Comba (a factor
|
||||
of 138).
|
||||
@@ -1297,14 +1301,14 @@ of $b$. This algorithm accepts an input $a$ of any range and is not limited by
|
||||
\section{Barrett Reduction}
|
||||
|
||||
Barrett reduction is a generic optimized reduction algorithm that requires pre--computation to achieve
|
||||
a decent speedup over straight division. First a $mu$ value must be precomputed with the following function.
|
||||
a decent speedup over straight division. First a $\mu$ value must be precomputed with the following function.
|
||||
|
||||
\index{mp\_reduce\_setup}
|
||||
\begin{alltt}
|
||||
int mp_reduce_setup(mp_int *a, mp_int *b);
|
||||
\end{alltt}
|
||||
|
||||
Given a modulus in $b$ this produces the required $mu$ value in $a$. For any given modulus this only has to
|
||||
Given a modulus in $b$ this produces the required $\mu$ value in $a$. For any given modulus this only has to
|
||||
be computed once. Modular reduction can now be performed with the following.
|
||||
|
||||
\index{mp\_reduce}
|
||||
@@ -1312,7 +1316,7 @@ be computed once. Modular reduction can now be performed with the following.
|
||||
int mp_reduce(mp_int *a, mp_int *b, mp_int *c);
|
||||
\end{alltt}
|
||||
|
||||
This will reduce $a$ in place modulo $b$ with the precomputed $mu$ value in $c$. $a$ must be in the range
|
||||
This will reduce $a$ in place modulo $b$ with the precomputed $\mu$ value in $c$. $a$ must be in the range
|
||||
$0 \le a < b^2$.
|
||||
|
||||
\begin{alltt}
|
||||
@@ -1578,7 +1582,8 @@ will return $-2$.
|
||||
This algorithm uses the ``Newton Approximation'' method and will converge on the correct root fairly quickly. Since
|
||||
the algorithm requires raising $a$ to the power of $b$ it is not ideal to attempt to find roots for large
|
||||
values of $b$. If particularly large roots are required then a factor method could be used instead. For example,
|
||||
$a^{1/16}$ is equivalent to $\left (a^{1/4} \right)^{1/4}$.
|
||||
$a^{1/16}$ is equivalent to $\left (a^{1/4} \right)^{1/4}$ or simply
|
||||
$\left ( \left ( \left ( a^{1/2} \right )^{1/2} \right )^{1/2} \right )^{1/2}$
|
||||
|
||||
\chapter{Prime Numbers}
|
||||
\section{Trial Division}
|
||||
|
||||
@@ -21,8 +21,7 @@
|
||||
* Based on slow invmod except this is optimized for the case where b is
|
||||
* odd as per HAC Note 14.64 on pp. 610
|
||||
*/
|
||||
int
|
||||
fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
|
||||
int fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
|
||||
{
|
||||
mp_int x, y, u, v, B, D;
|
||||
int res, neg;
|
||||
@@ -39,20 +38,20 @@ fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
|
||||
|
||||
/* x == modulus, y == value to invert */
|
||||
if ((res = mp_copy (b, &x)) != MP_OKAY) {
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
|
||||
/* we need y = |a| */
|
||||
if ((res = mp_abs (a, &y)) != MP_OKAY) {
|
||||
goto __ERR;
|
||||
if ((res = mp_mod (a, b, &y)) != MP_OKAY) {
|
||||
goto LBL_ERR;
|
||||
}
|
||||
|
||||
/* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
|
||||
if ((res = mp_copy (&x, &u)) != MP_OKAY) {
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
if ((res = mp_copy (&y, &v)) != MP_OKAY) {
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
mp_set (&D, 1);
|
||||
|
||||
@@ -61,17 +60,17 @@ top:
|
||||
while (mp_iseven (&u) == 1) {
|
||||
/* 4.1 u = u/2 */
|
||||
if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
/* 4.2 if B is odd then */
|
||||
if (mp_isodd (&B) == 1) {
|
||||
if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
}
|
||||
/* B = B/2 */
|
||||
if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -79,18 +78,18 @@ top:
|
||||
while (mp_iseven (&v) == 1) {
|
||||
/* 5.1 v = v/2 */
|
||||
if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
/* 5.2 if D is odd then */
|
||||
if (mp_isodd (&D) == 1) {
|
||||
/* D = (D-x)/2 */
|
||||
if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
}
|
||||
/* D = D/2 */
|
||||
if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -98,20 +97,20 @@ top:
|
||||
if (mp_cmp (&u, &v) != MP_LT) {
|
||||
/* u = u - v, B = B - D */
|
||||
if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
|
||||
if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
} else {
|
||||
/* v - v - u, D = D - B */
|
||||
if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
|
||||
if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -125,21 +124,21 @@ top:
|
||||
/* if v != 1 then there is no inverse */
|
||||
if (mp_cmp_d (&v, 1) != MP_EQ) {
|
||||
res = MP_VAL;
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
|
||||
/* b is now the inverse */
|
||||
neg = a->sign;
|
||||
while (D.sign == MP_NEG) {
|
||||
if ((res = mp_add (&D, b, &D)) != MP_OKAY) {
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
}
|
||||
mp_exch (&D, c);
|
||||
c->sign = neg;
|
||||
res = MP_OKAY;
|
||||
|
||||
__ERR:mp_clear_multi (&x, &y, &u, &v, &B, &D, NULL);
|
||||
LBL_ERR:mp_clear_multi (&x, &y, &u, &v, &B, &D, NULL);
|
||||
return res;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -23,8 +23,7 @@
|
||||
*
|
||||
* Based on Algorithm 14.32 on pp.601 of HAC.
|
||||
*/
|
||||
int
|
||||
fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
|
||||
int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
|
||||
{
|
||||
int ix, res, olduse;
|
||||
mp_word W[MP_WARRAY];
|
||||
|
||||
@@ -31,8 +31,7 @@
|
||||
* Based on Algorithm 14.12 on pp.595 of HAC.
|
||||
*
|
||||
*/
|
||||
int
|
||||
fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
|
||||
int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
|
||||
{
|
||||
int olduse, res, pa, ix, iz;
|
||||
mp_digit W[MP_WARRAY];
|
||||
@@ -50,7 +49,7 @@ fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
|
||||
|
||||
/* clear the carry */
|
||||
_W = 0;
|
||||
for (ix = 0; ix <= pa; ix++) {
|
||||
for (ix = 0; ix < pa; ix++) {
|
||||
int tx, ty;
|
||||
int iy;
|
||||
mp_digit *tmpx, *tmpy;
|
||||
@@ -63,7 +62,7 @@ fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
|
||||
tmpx = a->dp + tx;
|
||||
tmpy = b->dp + ty;
|
||||
|
||||
/* this is the number of times the loop will iterrate, essentially its
|
||||
/* this is the number of times the loop will iterrate, essentially
|
||||
while (tx++ < a->used && ty-- >= 0) { ... }
|
||||
*/
|
||||
iy = MIN(a->used-tx, ty+1);
|
||||
@@ -80,14 +79,17 @@ fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
|
||||
_W = _W >> ((mp_word)DIGIT_BIT);
|
||||
}
|
||||
|
||||
/* store final carry */
|
||||
W[ix] = (mp_digit)(_W & MP_MASK);
|
||||
|
||||
/* setup dest */
|
||||
olduse = c->used;
|
||||
c->used = digs;
|
||||
c->used = pa;
|
||||
|
||||
{
|
||||
register mp_digit *tmpc;
|
||||
tmpc = c->dp;
|
||||
for (ix = 0; ix < digs; ix++) {
|
||||
for (ix = 0; ix < pa+1; ix++) {
|
||||
/* now extract the previous digit [below the carry] */
|
||||
*tmpc++ = W[ix];
|
||||
}
|
||||
|
||||
@@ -24,8 +24,7 @@
|
||||
*
|
||||
* Based on Algorithm 14.12 on pp.595 of HAC.
|
||||
*/
|
||||
int
|
||||
fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
|
||||
int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
|
||||
{
|
||||
int olduse, res, pa, ix, iz;
|
||||
mp_digit W[MP_WARRAY];
|
||||
@@ -42,7 +41,7 @@ fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
|
||||
/* number of output digits to produce */
|
||||
pa = a->used + b->used;
|
||||
_W = 0;
|
||||
for (ix = digs; ix <= pa; ix++) {
|
||||
for (ix = digs; ix < pa; ix++) {
|
||||
int tx, ty, iy;
|
||||
mp_digit *tmpx, *tmpy;
|
||||
|
||||
@@ -70,6 +69,9 @@ fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
|
||||
/* make next carry */
|
||||
_W = _W >> ((mp_word)DIGIT_BIT);
|
||||
}
|
||||
|
||||
/* store final carry */
|
||||
W[ix] = (mp_digit)(_W & MP_MASK);
|
||||
|
||||
/* setup dest */
|
||||
olduse = c->used;
|
||||
|
||||
@@ -15,33 +15,14 @@
|
||||
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
|
||||
*/
|
||||
|
||||
/* fast squaring
|
||||
*
|
||||
* This is the comba method where the columns of the product
|
||||
* are computed first then the carries are computed. This
|
||||
* has the effect of making a very simple inner loop that
|
||||
* is executed the most
|
||||
*
|
||||
* W2 represents the outer products and W the inner.
|
||||
*
|
||||
* A further optimizations is made because the inner
|
||||
* products are of the form "A * B * 2". The *2 part does
|
||||
* not need to be computed until the end which is good
|
||||
* because 64-bit shifts are slow!
|
||||
*
|
||||
* Based on Algorithm 14.16 on pp.597 of HAC.
|
||||
*
|
||||
*/
|
||||
/* the jist of squaring...
|
||||
|
||||
you do like mult except the offset of the tmpx [one that starts closer to zero]
|
||||
can't equal the offset of tmpy. So basically you set up iy like before then you min it with
|
||||
(ty-tx) so that it never happens. You double all those you add in the inner loop
|
||||
* you do like mult except the offset of the tmpx [one that
|
||||
* starts closer to zero] can't equal the offset of tmpy.
|
||||
* So basically you set up iy like before then you min it with
|
||||
* (ty-tx) so that it never happens. You double all those
|
||||
* you add in the inner loop
|
||||
|
||||
After that loop you do the squares and add them in.
|
||||
|
||||
Remove W2 and don't memset W
|
||||
|
||||
*/
|
||||
|
||||
int fast_s_mp_sqr (mp_int * a, mp_int * b)
|
||||
@@ -60,7 +41,7 @@ int fast_s_mp_sqr (mp_int * a, mp_int * b)
|
||||
|
||||
/* number of output digits to produce */
|
||||
W1 = 0;
|
||||
for (ix = 0; ix <= pa; ix++) {
|
||||
for (ix = 0; ix < pa; ix++) {
|
||||
int tx, ty, iy;
|
||||
mp_word _W;
|
||||
mp_digit *tmpy;
|
||||
@@ -76,7 +57,7 @@ int fast_s_mp_sqr (mp_int * a, mp_int * b)
|
||||
tmpx = a->dp + tx;
|
||||
tmpy = a->dp + ty;
|
||||
|
||||
/* this is the number of times the loop will iterrate, essentially its
|
||||
/* this is the number of times the loop will iterrate, essentially
|
||||
while (tx++ < a->used && ty-- >= 0) { ... }
|
||||
*/
|
||||
iy = MIN(a->used-tx, ty+1);
|
||||
@@ -101,7 +82,7 @@ int fast_s_mp_sqr (mp_int * a, mp_int * b)
|
||||
}
|
||||
|
||||
/* store it */
|
||||
W[ix] = _W;
|
||||
W[ix] = (mp_digit)(_W & MP_MASK);
|
||||
|
||||
/* make next carry */
|
||||
W1 = _W >> ((mp_word)DIGIT_BIT);
|
||||
|
||||
56
bn_mp_div.c
56
bn_mp_div.c
@@ -49,23 +49,23 @@ int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d)
|
||||
|
||||
mp_set(&tq, 1);
|
||||
n = mp_count_bits(a) - mp_count_bits(b);
|
||||
if (((res = mp_copy(a, &ta)) != MP_OKAY) ||
|
||||
((res = mp_copy(b, &tb)) != MP_OKAY) ||
|
||||
if (((res = mp_abs(a, &ta)) != MP_OKAY) ||
|
||||
((res = mp_abs(b, &tb)) != MP_OKAY) ||
|
||||
((res = mp_mul_2d(&tb, n, &tb)) != MP_OKAY) ||
|
||||
((res = mp_mul_2d(&tq, n, &tq)) != MP_OKAY)) {
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
|
||||
while (n-- >= 0) {
|
||||
if (mp_cmp(&tb, &ta) != MP_GT) {
|
||||
if (((res = mp_sub(&ta, &tb, &ta)) != MP_OKAY) ||
|
||||
((res = mp_add(&q, &tq, &q)) != MP_OKAY)) {
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
}
|
||||
if (((res = mp_div_2d(&tb, 1, &tb, NULL)) != MP_OKAY) ||
|
||||
((res = mp_div_2d(&tq, 1, &tq, NULL)) != MP_OKAY)) {
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -74,13 +74,13 @@ int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d)
|
||||
n2 = (a->sign == b->sign ? MP_ZPOS : MP_NEG);
|
||||
if (c != NULL) {
|
||||
mp_exch(c, &q);
|
||||
c->sign = n2;
|
||||
c->sign = (mp_iszero(c) == MP_YES) ? MP_ZPOS : n2;
|
||||
}
|
||||
if (d != NULL) {
|
||||
mp_exch(d, &ta);
|
||||
d->sign = n;
|
||||
d->sign = (mp_iszero(d) == MP_YES) ? MP_ZPOS : n;
|
||||
}
|
||||
__ERR:
|
||||
LBL_ERR:
|
||||
mp_clear_multi(&ta, &tb, &tq, &q, NULL);
|
||||
return res;
|
||||
}
|
||||
@@ -129,19 +129,19 @@ int mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
|
||||
q.used = a->used + 2;
|
||||
|
||||
if ((res = mp_init (&t1)) != MP_OKAY) {
|
||||
goto __Q;
|
||||
goto LBL_Q;
|
||||
}
|
||||
|
||||
if ((res = mp_init (&t2)) != MP_OKAY) {
|
||||
goto __T1;
|
||||
goto LBL_T1;
|
||||
}
|
||||
|
||||
if ((res = mp_init_copy (&x, a)) != MP_OKAY) {
|
||||
goto __T2;
|
||||
goto LBL_T2;
|
||||
}
|
||||
|
||||
if ((res = mp_init_copy (&y, b)) != MP_OKAY) {
|
||||
goto __X;
|
||||
goto LBL_X;
|
||||
}
|
||||
|
||||
/* fix the sign */
|
||||
@@ -153,10 +153,10 @@ int mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
|
||||
if (norm < (int)(DIGIT_BIT-1)) {
|
||||
norm = (DIGIT_BIT-1) - norm;
|
||||
if ((res = mp_mul_2d (&x, norm, &x)) != MP_OKAY) {
|
||||
goto __Y;
|
||||
goto LBL_Y;
|
||||
}
|
||||
if ((res = mp_mul_2d (&y, norm, &y)) != MP_OKAY) {
|
||||
goto __Y;
|
||||
goto LBL_Y;
|
||||
}
|
||||
} else {
|
||||
norm = 0;
|
||||
@@ -168,13 +168,13 @@ int mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
|
||||
|
||||
/* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */
|
||||
if ((res = mp_lshd (&y, n - t)) != MP_OKAY) { /* y = y*b**{n-t} */
|
||||
goto __Y;
|
||||
goto LBL_Y;
|
||||
}
|
||||
|
||||
while (mp_cmp (&x, &y) != MP_LT) {
|
||||
++(q.dp[n - t]);
|
||||
if ((res = mp_sub (&x, &y, &x)) != MP_OKAY) {
|
||||
goto __Y;
|
||||
goto LBL_Y;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -216,7 +216,7 @@ int mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
|
||||
t1.dp[1] = y.dp[t];
|
||||
t1.used = 2;
|
||||
if ((res = mp_mul_d (&t1, q.dp[i - t - 1], &t1)) != MP_OKAY) {
|
||||
goto __Y;
|
||||
goto LBL_Y;
|
||||
}
|
||||
|
||||
/* find right hand */
|
||||
@@ -228,27 +228,27 @@ int mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
|
||||
|
||||
/* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */
|
||||
if ((res = mp_mul_d (&y, q.dp[i - t - 1], &t1)) != MP_OKAY) {
|
||||
goto __Y;
|
||||
goto LBL_Y;
|
||||
}
|
||||
|
||||
if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
|
||||
goto __Y;
|
||||
goto LBL_Y;
|
||||
}
|
||||
|
||||
if ((res = mp_sub (&x, &t1, &x)) != MP_OKAY) {
|
||||
goto __Y;
|
||||
goto LBL_Y;
|
||||
}
|
||||
|
||||
/* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */
|
||||
if (x.sign == MP_NEG) {
|
||||
if ((res = mp_copy (&y, &t1)) != MP_OKAY) {
|
||||
goto __Y;
|
||||
goto LBL_Y;
|
||||
}
|
||||
if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
|
||||
goto __Y;
|
||||
goto LBL_Y;
|
||||
}
|
||||
if ((res = mp_add (&x, &t1, &x)) != MP_OKAY) {
|
||||
goto __Y;
|
||||
goto LBL_Y;
|
||||
}
|
||||
|
||||
q.dp[i - t - 1] = (q.dp[i - t - 1] - 1UL) & MP_MASK;
|
||||
@@ -275,11 +275,11 @@ int mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
|
||||
|
||||
res = MP_OKAY;
|
||||
|
||||
__Y:mp_clear (&y);
|
||||
__X:mp_clear (&x);
|
||||
__T2:mp_clear (&t2);
|
||||
__T1:mp_clear (&t1);
|
||||
__Q:mp_clear (&q);
|
||||
LBL_Y:mp_clear (&y);
|
||||
LBL_X:mp_clear (&x);
|
||||
LBL_T2:mp_clear (&t2);
|
||||
LBL_T1:mp_clear (&t1);
|
||||
LBL_Q:mp_clear (&q);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@
|
||||
* Based on algorithm from the paper
|
||||
*
|
||||
* "Generating Efficient Primes for Discrete Log Cryptosystems"
|
||||
* Chae Hoon Lim, Pil Loong Lee,
|
||||
* Chae Hoon Lim, Pil Joong Lee,
|
||||
* POSTECH Information Research Laboratories
|
||||
*
|
||||
* The modulus must be of a special format [see manual]
|
||||
|
||||
@@ -61,25 +61,33 @@ int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
|
||||
return err;
|
||||
#else
|
||||
/* no invmod */
|
||||
return MP_VAL
|
||||
return MP_VAL;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* modified diminished radix reduction */
|
||||
#if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C)
|
||||
if (mp_reduce_is_2k_l(P) == MP_YES) {
|
||||
return s_mp_exptmod(G, X, P, Y, 1);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef BN_MP_DR_IS_MODULUS_C
|
||||
/* is it a DR modulus? */
|
||||
dr = mp_dr_is_modulus(P);
|
||||
#else
|
||||
/* default to no */
|
||||
dr = 0;
|
||||
#endif
|
||||
|
||||
#ifdef BN_MP_REDUCE_IS_2K_C
|
||||
/* if not, is it a uDR modulus? */
|
||||
/* if not, is it a unrestricted DR modulus? */
|
||||
if (dr == 0) {
|
||||
dr = mp_reduce_is_2k(P) << 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* if the modulus is odd or dr != 0 use the fast method */
|
||||
/* if the modulus is odd or dr != 0 use the montgomery method */
|
||||
#ifdef BN_MP_EXPTMOD_FAST_C
|
||||
if (mp_isodd (P) == 1 || dr != 0) {
|
||||
return mp_exptmod_fast (G, X, P, Y, dr);
|
||||
@@ -87,7 +95,7 @@ int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
|
||||
#endif
|
||||
#ifdef BN_S_MP_EXPTMOD_C
|
||||
/* otherwise use the generic Barrett reduction technique */
|
||||
return s_mp_exptmod (G, X, P, Y);
|
||||
return s_mp_exptmod (G, X, P, Y, 0);
|
||||
#else
|
||||
/* no exptmod for evens */
|
||||
return MP_VAL;
|
||||
|
||||
@@ -29,8 +29,7 @@
|
||||
#define TAB_SIZE 256
|
||||
#endif
|
||||
|
||||
int
|
||||
mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
|
||||
int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
|
||||
{
|
||||
mp_int M[TAB_SIZE], res;
|
||||
mp_digit buf, mp;
|
||||
@@ -88,11 +87,11 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
|
||||
#ifdef BN_MP_MONTGOMERY_SETUP_C
|
||||
/* now setup montgomery */
|
||||
if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
|
||||
goto __M;
|
||||
goto LBL_M;
|
||||
}
|
||||
#else
|
||||
err = MP_VAL;
|
||||
goto __M;
|
||||
goto LBL_M;
|
||||
#endif
|
||||
|
||||
/* automatically pick the comba one if available (saves quite a few calls/ifs) */
|
||||
@@ -108,7 +107,7 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
|
||||
redux = mp_montgomery_reduce;
|
||||
#else
|
||||
err = MP_VAL;
|
||||
goto __M;
|
||||
goto LBL_M;
|
||||
#endif
|
||||
}
|
||||
} else if (redmode == 1) {
|
||||
@@ -118,24 +117,24 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
|
||||
redux = mp_dr_reduce;
|
||||
#else
|
||||
err = MP_VAL;
|
||||
goto __M;
|
||||
goto LBL_M;
|
||||
#endif
|
||||
} else {
|
||||
#if defined(BN_MP_REDUCE_2K_SETUP_C) && defined(BN_MP_REDUCE_2K_C)
|
||||
/* setup DR reduction for moduli of the form 2**k - b */
|
||||
if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
|
||||
goto __M;
|
||||
goto LBL_M;
|
||||
}
|
||||
redux = mp_reduce_2k;
|
||||
#else
|
||||
err = MP_VAL;
|
||||
goto __M;
|
||||
goto LBL_M;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* setup result */
|
||||
if ((err = mp_init (&res)) != MP_OKAY) {
|
||||
goto __M;
|
||||
goto LBL_M;
|
||||
}
|
||||
|
||||
/* create M table
|
||||
@@ -149,45 +148,45 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
|
||||
#ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
|
||||
/* now we need R mod m */
|
||||
if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
|
||||
goto __RES;
|
||||
goto LBL_RES;
|
||||
}
|
||||
#else
|
||||
err = MP_VAL;
|
||||
goto __RES;
|
||||
goto LBL_RES;
|
||||
#endif
|
||||
|
||||
/* now set M[1] to G * R mod m */
|
||||
if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) {
|
||||
goto __RES;
|
||||
goto LBL_RES;
|
||||
}
|
||||
} else {
|
||||
mp_set(&res, 1);
|
||||
if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
|
||||
goto __RES;
|
||||
goto LBL_RES;
|
||||
}
|
||||
}
|
||||
|
||||
/* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
|
||||
if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
|
||||
goto __RES;
|
||||
goto LBL_RES;
|
||||
}
|
||||
|
||||
for (x = 0; x < (winsize - 1); x++) {
|
||||
if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) {
|
||||
goto __RES;
|
||||
goto LBL_RES;
|
||||
}
|
||||
if ((err = redux (&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) {
|
||||
goto __RES;
|
||||
goto LBL_RES;
|
||||
}
|
||||
}
|
||||
|
||||
/* create upper table */
|
||||
for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
|
||||
if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
|
||||
goto __RES;
|
||||
goto LBL_RES;
|
||||
}
|
||||
if ((err = redux (&M[x], P, mp)) != MP_OKAY) {
|
||||
goto __RES;
|
||||
goto LBL_RES;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -227,10 +226,10 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
|
||||
/* if the bit is zero and mode == 1 then we square */
|
||||
if (mode == 1 && y == 0) {
|
||||
if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
|
||||
goto __RES;
|
||||
goto LBL_RES;
|
||||
}
|
||||
if ((err = redux (&res, P, mp)) != MP_OKAY) {
|
||||
goto __RES;
|
||||
goto LBL_RES;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
@@ -244,19 +243,19 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
|
||||
/* square first */
|
||||
for (x = 0; x < winsize; x++) {
|
||||
if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
|
||||
goto __RES;
|
||||
goto LBL_RES;
|
||||
}
|
||||
if ((err = redux (&res, P, mp)) != MP_OKAY) {
|
||||
goto __RES;
|
||||
goto LBL_RES;
|
||||
}
|
||||
}
|
||||
|
||||
/* then multiply */
|
||||
if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
|
||||
goto __RES;
|
||||
goto LBL_RES;
|
||||
}
|
||||
if ((err = redux (&res, P, mp)) != MP_OKAY) {
|
||||
goto __RES;
|
||||
goto LBL_RES;
|
||||
}
|
||||
|
||||
/* empty window and reset */
|
||||
@@ -271,10 +270,10 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
|
||||
/* square then multiply if the bit is set */
|
||||
for (x = 0; x < bitcpy; x++) {
|
||||
if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
|
||||
goto __RES;
|
||||
goto LBL_RES;
|
||||
}
|
||||
if ((err = redux (&res, P, mp)) != MP_OKAY) {
|
||||
goto __RES;
|
||||
goto LBL_RES;
|
||||
}
|
||||
|
||||
/* get next bit of the window */
|
||||
@@ -282,10 +281,10 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
|
||||
if ((bitbuf & (1 << winsize)) != 0) {
|
||||
/* then multiply */
|
||||
if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
|
||||
goto __RES;
|
||||
goto LBL_RES;
|
||||
}
|
||||
if ((err = redux (&res, P, mp)) != MP_OKAY) {
|
||||
goto __RES;
|
||||
goto LBL_RES;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -299,15 +298,15 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
|
||||
* of R.
|
||||
*/
|
||||
if ((err = redux(&res, P, mp)) != MP_OKAY) {
|
||||
goto __RES;
|
||||
goto LBL_RES;
|
||||
}
|
||||
}
|
||||
|
||||
/* swap res with Y */
|
||||
mp_exch (&res, Y);
|
||||
err = MP_OKAY;
|
||||
__RES:mp_clear (&res);
|
||||
__M:
|
||||
LBL_RES:mp_clear (&res);
|
||||
LBL_M:
|
||||
mp_clear(&M[1]);
|
||||
for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
|
||||
mp_clear (&M[x]);
|
||||
|
||||
@@ -59,6 +59,13 @@ int mp_exteuclid(mp_int *a, mp_int *b, mp_int *U1, mp_int *U2, mp_int *U3)
|
||||
if ((err = mp_copy(&t3, &v3)) != MP_OKAY) { goto _ERR; }
|
||||
}
|
||||
|
||||
/* make sure U3 >= 0 */
|
||||
if (u3.sign == MP_NEG) {
|
||||
mp_neg(&u1, &u1);
|
||||
mp_neg(&u2, &u2);
|
||||
mp_neg(&u3, &u3);
|
||||
}
|
||||
|
||||
/* copy result out */
|
||||
if (U1 != NULL) { mp_exch(U1, &u1); }
|
||||
if (U2 != NULL) { mp_exch(U2, &u2); }
|
||||
|
||||
20
bn_mp_gcd.c
20
bn_mp_gcd.c
@@ -43,7 +43,7 @@ int mp_gcd (mp_int * a, mp_int * b, mp_int * c)
|
||||
}
|
||||
|
||||
if ((res = mp_init_copy (&v, b)) != MP_OKAY) {
|
||||
goto __U;
|
||||
goto LBL_U;
|
||||
}
|
||||
|
||||
/* must be positive for the remainder of the algorithm */
|
||||
@@ -57,24 +57,24 @@ int mp_gcd (mp_int * a, mp_int * b, mp_int * c)
|
||||
if (k > 0) {
|
||||
/* divide the power of two out */
|
||||
if ((res = mp_div_2d(&u, k, &u, NULL)) != MP_OKAY) {
|
||||
goto __V;
|
||||
goto LBL_V;
|
||||
}
|
||||
|
||||
if ((res = mp_div_2d(&v, k, &v, NULL)) != MP_OKAY) {
|
||||
goto __V;
|
||||
goto LBL_V;
|
||||
}
|
||||
}
|
||||
|
||||
/* divide any remaining factors of two out */
|
||||
if (u_lsb != k) {
|
||||
if ((res = mp_div_2d(&u, u_lsb - k, &u, NULL)) != MP_OKAY) {
|
||||
goto __V;
|
||||
goto LBL_V;
|
||||
}
|
||||
}
|
||||
|
||||
if (v_lsb != k) {
|
||||
if ((res = mp_div_2d(&v, v_lsb - k, &v, NULL)) != MP_OKAY) {
|
||||
goto __V;
|
||||
goto LBL_V;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -87,23 +87,23 @@ int mp_gcd (mp_int * a, mp_int * b, mp_int * c)
|
||||
|
||||
/* subtract smallest from largest */
|
||||
if ((res = s_mp_sub(&v, &u, &v)) != MP_OKAY) {
|
||||
goto __V;
|
||||
goto LBL_V;
|
||||
}
|
||||
|
||||
/* Divide out all factors of two */
|
||||
if ((res = mp_div_2d(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) {
|
||||
goto __V;
|
||||
goto LBL_V;
|
||||
}
|
||||
}
|
||||
|
||||
/* multiply by 2**k which we divided out at the beginning */
|
||||
if ((res = mp_mul_2d (&u, k, c)) != MP_OKAY) {
|
||||
goto __V;
|
||||
goto LBL_V;
|
||||
}
|
||||
c->sign = MP_ZPOS;
|
||||
res = MP_OKAY;
|
||||
__V:mp_clear (&u);
|
||||
__U:mp_clear (&v);
|
||||
LBL_V:mp_clear (&u);
|
||||
LBL_U:mp_clear (&v);
|
||||
return res;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -33,25 +33,25 @@ int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c)
|
||||
}
|
||||
|
||||
/* x = a, y = b */
|
||||
if ((res = mp_copy (a, &x)) != MP_OKAY) {
|
||||
goto __ERR;
|
||||
if ((res = mp_mod(a, b, &x)) != MP_OKAY) {
|
||||
goto LBL_ERR;
|
||||
}
|
||||
if ((res = mp_copy (b, &y)) != MP_OKAY) {
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
|
||||
/* 2. [modified] if x,y are both even then return an error! */
|
||||
if (mp_iseven (&x) == 1 && mp_iseven (&y) == 1) {
|
||||
res = MP_VAL;
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
|
||||
/* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
|
||||
if ((res = mp_copy (&x, &u)) != MP_OKAY) {
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
if ((res = mp_copy (&y, &v)) != MP_OKAY) {
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
mp_set (&A, 1);
|
||||
mp_set (&D, 1);
|
||||
@@ -61,24 +61,24 @@ top:
|
||||
while (mp_iseven (&u) == 1) {
|
||||
/* 4.1 u = u/2 */
|
||||
if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
/* 4.2 if A or B is odd then */
|
||||
if (mp_isodd (&A) == 1 || mp_isodd (&B) == 1) {
|
||||
/* A = (A+y)/2, B = (B-x)/2 */
|
||||
if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
}
|
||||
/* A = A/2, B = B/2 */
|
||||
if ((res = mp_div_2 (&A, &A)) != MP_OKAY) {
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -86,24 +86,24 @@ top:
|
||||
while (mp_iseven (&v) == 1) {
|
||||
/* 5.1 v = v/2 */
|
||||
if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
/* 5.2 if C or D is odd then */
|
||||
if (mp_isodd (&C) == 1 || mp_isodd (&D) == 1) {
|
||||
/* C = (C+y)/2, D = (D-x)/2 */
|
||||
if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
}
|
||||
/* C = C/2, D = D/2 */
|
||||
if ((res = mp_div_2 (&C, &C)) != MP_OKAY) {
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -111,28 +111,28 @@ top:
|
||||
if (mp_cmp (&u, &v) != MP_LT) {
|
||||
/* u = u - v, A = A - C, B = B - D */
|
||||
if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
|
||||
if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) {
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
|
||||
if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
} else {
|
||||
/* v - v - u, C = C - A, D = D - B */
|
||||
if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
|
||||
if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) {
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
|
||||
if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -145,27 +145,27 @@ top:
|
||||
/* if v != 1 then there is no inverse */
|
||||
if (mp_cmp_d (&v, 1) != MP_EQ) {
|
||||
res = MP_VAL;
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
|
||||
/* if its too low */
|
||||
while (mp_cmp_d(&C, 0) == MP_LT) {
|
||||
if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
}
|
||||
|
||||
/* too big */
|
||||
while (mp_cmp_mag(&C, b) != MP_LT) {
|
||||
if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
}
|
||||
|
||||
/* C is now the inverse */
|
||||
mp_exch (&C, c);
|
||||
res = MP_OKAY;
|
||||
__ERR:mp_clear_multi (&x, &y, &u, &v, &A, &B, &C, &D, NULL);
|
||||
LBL_ERR:mp_clear_multi (&x, &y, &u, &v, &A, &B, &C, &D, NULL);
|
||||
return res;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -50,13 +50,13 @@ int mp_jacobi (mp_int * a, mp_int * p, int *c)
|
||||
}
|
||||
|
||||
if ((res = mp_init (&p1)) != MP_OKAY) {
|
||||
goto __A1;
|
||||
goto LBL_A1;
|
||||
}
|
||||
|
||||
/* divide out larger power of two */
|
||||
k = mp_cnt_lsb(&a1);
|
||||
if ((res = mp_div_2d(&a1, k, &a1, NULL)) != MP_OKAY) {
|
||||
goto __P1;
|
||||
goto LBL_P1;
|
||||
}
|
||||
|
||||
/* step 4. if e is even set s=1 */
|
||||
@@ -84,18 +84,18 @@ int mp_jacobi (mp_int * a, mp_int * p, int *c)
|
||||
} else {
|
||||
/* n1 = n mod a1 */
|
||||
if ((res = mp_mod (p, &a1, &p1)) != MP_OKAY) {
|
||||
goto __P1;
|
||||
goto LBL_P1;
|
||||
}
|
||||
if ((res = mp_jacobi (&p1, &a1, &r)) != MP_OKAY) {
|
||||
goto __P1;
|
||||
goto LBL_P1;
|
||||
}
|
||||
*c = s * r;
|
||||
}
|
||||
|
||||
/* done */
|
||||
res = MP_OKAY;
|
||||
__P1:mp_clear (&p1);
|
||||
__A1:mp_clear (&a1);
|
||||
LBL_P1:mp_clear (&p1);
|
||||
LBL_A1:mp_clear (&a1);
|
||||
return res;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -28,20 +28,20 @@ int mp_lcm (mp_int * a, mp_int * b, mp_int * c)
|
||||
|
||||
/* t1 = get the GCD of the two inputs */
|
||||
if ((res = mp_gcd (a, b, &t1)) != MP_OKAY) {
|
||||
goto __T;
|
||||
goto LBL_T;
|
||||
}
|
||||
|
||||
/* divide the smallest by the GCD */
|
||||
if (mp_cmp_mag(a, b) == MP_LT) {
|
||||
/* store quotient in t2 such that t2 * b is the LCM */
|
||||
if ((res = mp_div(a, &t1, &t2, NULL)) != MP_OKAY) {
|
||||
goto __T;
|
||||
goto LBL_T;
|
||||
}
|
||||
res = mp_mul(b, &t2, c);
|
||||
} else {
|
||||
/* store quotient in t2 such that t2 * a is the LCM */
|
||||
if ((res = mp_div(b, &t1, &t2, NULL)) != MP_OKAY) {
|
||||
goto __T;
|
||||
goto LBL_T;
|
||||
}
|
||||
res = mp_mul(a, &t2, c);
|
||||
}
|
||||
@@ -49,7 +49,7 @@ int mp_lcm (mp_int * a, mp_int * b, mp_int * c)
|
||||
/* fix the sign to positive */
|
||||
c->sign = MP_ZPOS;
|
||||
|
||||
__T:
|
||||
LBL_T:
|
||||
mp_clear_multi (&t1, &t2, NULL);
|
||||
return res;
|
||||
}
|
||||
|
||||
@@ -28,7 +28,7 @@ mp_mod_2d (mp_int * a, int b, mp_int * c)
|
||||
}
|
||||
|
||||
/* if the modulus is larger than the value than return */
|
||||
if (b > (int) (a->used * DIGIT_BIT)) {
|
||||
if (b >= (int) (a->used * DIGIT_BIT)) {
|
||||
res = mp_copy (a, c);
|
||||
return res;
|
||||
}
|
||||
|
||||
@@ -28,7 +28,6 @@ int mp_montgomery_calc_normalization (mp_int * a, mp_int * b)
|
||||
/* how many bits of last digit does b use */
|
||||
bits = mp_count_bits (b) % DIGIT_BIT;
|
||||
|
||||
|
||||
if (b->used > 1) {
|
||||
if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) {
|
||||
return res;
|
||||
|
||||
@@ -57,8 +57,9 @@ mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
|
||||
u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
|
||||
}
|
||||
|
||||
/* store final carry [if any] */
|
||||
/* store final carry [if any] and increment ix offset */
|
||||
*tmpc++ = u;
|
||||
++ix;
|
||||
|
||||
/* now zero digits above the top */
|
||||
while (ix++ < olduse) {
|
||||
|
||||
@@ -40,11 +40,11 @@ int mp_n_root (mp_int * a, mp_digit b, mp_int * c)
|
||||
}
|
||||
|
||||
if ((res = mp_init (&t2)) != MP_OKAY) {
|
||||
goto __T1;
|
||||
goto LBL_T1;
|
||||
}
|
||||
|
||||
if ((res = mp_init (&t3)) != MP_OKAY) {
|
||||
goto __T2;
|
||||
goto LBL_T2;
|
||||
}
|
||||
|
||||
/* if a is negative fudge the sign but keep track */
|
||||
@@ -57,52 +57,52 @@ int mp_n_root (mp_int * a, mp_digit b, mp_int * c)
|
||||
do {
|
||||
/* t1 = t2 */
|
||||
if ((res = mp_copy (&t2, &t1)) != MP_OKAY) {
|
||||
goto __T3;
|
||||
goto LBL_T3;
|
||||
}
|
||||
|
||||
/* t2 = t1 - ((t1**b - a) / (b * t1**(b-1))) */
|
||||
|
||||
/* t3 = t1**(b-1) */
|
||||
if ((res = mp_expt_d (&t1, b - 1, &t3)) != MP_OKAY) {
|
||||
goto __T3;
|
||||
goto LBL_T3;
|
||||
}
|
||||
|
||||
/* numerator */
|
||||
/* t2 = t1**b */
|
||||
if ((res = mp_mul (&t3, &t1, &t2)) != MP_OKAY) {
|
||||
goto __T3;
|
||||
goto LBL_T3;
|
||||
}
|
||||
|
||||
/* t2 = t1**b - a */
|
||||
if ((res = mp_sub (&t2, a, &t2)) != MP_OKAY) {
|
||||
goto __T3;
|
||||
goto LBL_T3;
|
||||
}
|
||||
|
||||
/* denominator */
|
||||
/* t3 = t1**(b-1) * b */
|
||||
if ((res = mp_mul_d (&t3, b, &t3)) != MP_OKAY) {
|
||||
goto __T3;
|
||||
goto LBL_T3;
|
||||
}
|
||||
|
||||
/* t3 = (t1**b - a)/(b * t1**(b-1)) */
|
||||
if ((res = mp_div (&t2, &t3, &t3, NULL)) != MP_OKAY) {
|
||||
goto __T3;
|
||||
goto LBL_T3;
|
||||
}
|
||||
|
||||
if ((res = mp_sub (&t1, &t3, &t2)) != MP_OKAY) {
|
||||
goto __T3;
|
||||
goto LBL_T3;
|
||||
}
|
||||
} while (mp_cmp (&t1, &t2) != MP_EQ);
|
||||
|
||||
/* result can be off by a few so check */
|
||||
for (;;) {
|
||||
if ((res = mp_expt_d (&t1, b, &t2)) != MP_OKAY) {
|
||||
goto __T3;
|
||||
goto LBL_T3;
|
||||
}
|
||||
|
||||
if (mp_cmp (&t2, a) == MP_GT) {
|
||||
if ((res = mp_sub_d (&t1, 1, &t1)) != MP_OKAY) {
|
||||
goto __T3;
|
||||
goto LBL_T3;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
@@ -120,9 +120,9 @@ int mp_n_root (mp_int * a, mp_digit b, mp_int * c)
|
||||
|
||||
res = MP_OKAY;
|
||||
|
||||
__T3:mp_clear (&t3);
|
||||
__T2:mp_clear (&t2);
|
||||
__T1:mp_clear (&t1);
|
||||
LBL_T3:mp_clear (&t3);
|
||||
LBL_T2:mp_clear (&t2);
|
||||
LBL_T1:mp_clear (&t1);
|
||||
return res;
|
||||
}
|
||||
#endif
|
||||
|
||||
10
bn_mp_neg.c
10
bn_mp_neg.c
@@ -19,12 +19,18 @@
|
||||
int mp_neg (mp_int * a, mp_int * b)
|
||||
{
|
||||
int res;
|
||||
if ((res = mp_copy (a, b)) != MP_OKAY) {
|
||||
return res;
|
||||
if (a != b) {
|
||||
if ((res = mp_copy (a, b)) != MP_OKAY) {
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
if (mp_iszero(b) != MP_YES) {
|
||||
b->sign = (a->sign == MP_ZPOS) ? MP_NEG : MP_ZPOS;
|
||||
} else {
|
||||
b->sign = MP_ZPOS;
|
||||
}
|
||||
|
||||
return MP_OKAY;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -43,7 +43,7 @@ int mp_prime_fermat (mp_int * a, mp_int * b, int *result)
|
||||
|
||||
/* compute t = b**a mod a */
|
||||
if ((err = mp_exptmod (b, a, a, &t)) != MP_OKAY) {
|
||||
goto __T;
|
||||
goto LBL_T;
|
||||
}
|
||||
|
||||
/* is it equal to b? */
|
||||
@@ -52,7 +52,7 @@ int mp_prime_fermat (mp_int * a, mp_int * b, int *result)
|
||||
}
|
||||
|
||||
err = MP_OKAY;
|
||||
__T:mp_clear (&t);
|
||||
LBL_T:mp_clear (&t);
|
||||
return err;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -29,8 +29,8 @@ int mp_prime_is_divisible (mp_int * a, int *result)
|
||||
*result = MP_NO;
|
||||
|
||||
for (ix = 0; ix < PRIME_SIZE; ix++) {
|
||||
/* what is a mod __prime_tab[ix] */
|
||||
if ((err = mp_mod_d (a, __prime_tab[ix], &res)) != MP_OKAY) {
|
||||
/* what is a mod LBL_prime_tab[ix] */
|
||||
if ((err = mp_mod_d (a, ltm_prime_tab[ix], &res)) != MP_OKAY) {
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
@@ -37,7 +37,7 @@ int mp_prime_is_prime (mp_int * a, int t, int *result)
|
||||
|
||||
/* is the input equal to one of the primes in the table? */
|
||||
for (ix = 0; ix < PRIME_SIZE; ix++) {
|
||||
if (mp_cmp_d(a, __prime_tab[ix]) == MP_EQ) {
|
||||
if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) {
|
||||
*result = 1;
|
||||
return MP_OKAY;
|
||||
}
|
||||
@@ -60,20 +60,20 @@ int mp_prime_is_prime (mp_int * a, int t, int *result)
|
||||
|
||||
for (ix = 0; ix < t; ix++) {
|
||||
/* set the prime */
|
||||
mp_set (&b, __prime_tab[ix]);
|
||||
mp_set (&b, ltm_prime_tab[ix]);
|
||||
|
||||
if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
|
||||
goto __B;
|
||||
goto LBL_B;
|
||||
}
|
||||
|
||||
if (res == MP_NO) {
|
||||
goto __B;
|
||||
goto LBL_B;
|
||||
}
|
||||
}
|
||||
|
||||
/* passed the test */
|
||||
*result = MP_YES;
|
||||
__B:mp_clear (&b);
|
||||
LBL_B:mp_clear (&b);
|
||||
return err;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -40,12 +40,12 @@ int mp_prime_miller_rabin (mp_int * a, mp_int * b, int *result)
|
||||
return err;
|
||||
}
|
||||
if ((err = mp_sub_d (&n1, 1, &n1)) != MP_OKAY) {
|
||||
goto __N1;
|
||||
goto LBL_N1;
|
||||
}
|
||||
|
||||
/* set 2**s * r = n1 */
|
||||
if ((err = mp_init_copy (&r, &n1)) != MP_OKAY) {
|
||||
goto __N1;
|
||||
goto LBL_N1;
|
||||
}
|
||||
|
||||
/* count the number of least significant bits
|
||||
@@ -55,15 +55,15 @@ int mp_prime_miller_rabin (mp_int * a, mp_int * b, int *result)
|
||||
|
||||
/* now divide n - 1 by 2**s */
|
||||
if ((err = mp_div_2d (&r, s, &r, NULL)) != MP_OKAY) {
|
||||
goto __R;
|
||||
goto LBL_R;
|
||||
}
|
||||
|
||||
/* compute y = b**r mod a */
|
||||
if ((err = mp_init (&y)) != MP_OKAY) {
|
||||
goto __R;
|
||||
goto LBL_R;
|
||||
}
|
||||
if ((err = mp_exptmod (b, &r, a, &y)) != MP_OKAY) {
|
||||
goto __Y;
|
||||
goto LBL_Y;
|
||||
}
|
||||
|
||||
/* if y != 1 and y != n1 do */
|
||||
@@ -72,12 +72,12 @@ int mp_prime_miller_rabin (mp_int * a, mp_int * b, int *result)
|
||||
/* while j <= s-1 and y != n1 */
|
||||
while ((j <= (s - 1)) && mp_cmp (&y, &n1) != MP_EQ) {
|
||||
if ((err = mp_sqrmod (&y, a, &y)) != MP_OKAY) {
|
||||
goto __Y;
|
||||
goto LBL_Y;
|
||||
}
|
||||
|
||||
/* if y == 1 then composite */
|
||||
if (mp_cmp_d (&y, 1) == MP_EQ) {
|
||||
goto __Y;
|
||||
goto LBL_Y;
|
||||
}
|
||||
|
||||
++j;
|
||||
@@ -85,15 +85,15 @@ int mp_prime_miller_rabin (mp_int * a, mp_int * b, int *result)
|
||||
|
||||
/* if y != n1 then composite */
|
||||
if (mp_cmp (&y, &n1) != MP_EQ) {
|
||||
goto __Y;
|
||||
goto LBL_Y;
|
||||
}
|
||||
}
|
||||
|
||||
/* probably prime now */
|
||||
*result = MP_YES;
|
||||
__Y:mp_clear (&y);
|
||||
__R:mp_clear (&r);
|
||||
__N1:mp_clear (&n1);
|
||||
LBL_Y:mp_clear (&y);
|
||||
LBL_R:mp_clear (&r);
|
||||
LBL_N1:mp_clear (&n1);
|
||||
return err;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -35,10 +35,10 @@ int mp_prime_next_prime(mp_int *a, int t, int bbs_style)
|
||||
a->sign = MP_ZPOS;
|
||||
|
||||
/* simple algo if a is less than the largest prime in the table */
|
||||
if (mp_cmp_d(a, __prime_tab[PRIME_SIZE-1]) == MP_LT) {
|
||||
if (mp_cmp_d(a, ltm_prime_tab[PRIME_SIZE-1]) == MP_LT) {
|
||||
/* find which prime it is bigger than */
|
||||
for (x = PRIME_SIZE - 2; x >= 0; x--) {
|
||||
if (mp_cmp_d(a, __prime_tab[x]) != MP_LT) {
|
||||
if (mp_cmp_d(a, ltm_prime_tab[x]) != MP_LT) {
|
||||
if (bbs_style == 1) {
|
||||
/* ok we found a prime smaller or
|
||||
* equal [so the next is larger]
|
||||
@@ -46,17 +46,17 @@ int mp_prime_next_prime(mp_int *a, int t, int bbs_style)
|
||||
* however, the prime must be
|
||||
* congruent to 3 mod 4
|
||||
*/
|
||||
if ((__prime_tab[x + 1] & 3) != 3) {
|
||||
if ((ltm_prime_tab[x + 1] & 3) != 3) {
|
||||
/* scan upwards for a prime congruent to 3 mod 4 */
|
||||
for (y = x + 1; y < PRIME_SIZE; y++) {
|
||||
if ((__prime_tab[y] & 3) == 3) {
|
||||
mp_set(a, __prime_tab[y]);
|
||||
if ((ltm_prime_tab[y] & 3) == 3) {
|
||||
mp_set(a, ltm_prime_tab[y]);
|
||||
return MP_OKAY;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
mp_set(a, __prime_tab[x + 1]);
|
||||
mp_set(a, ltm_prime_tab[x + 1]);
|
||||
return MP_OKAY;
|
||||
}
|
||||
}
|
||||
@@ -94,7 +94,7 @@ int mp_prime_next_prime(mp_int *a, int t, int bbs_style)
|
||||
|
||||
/* generate the restable */
|
||||
for (x = 1; x < PRIME_SIZE; x++) {
|
||||
if ((err = mp_mod_d(a, __prime_tab[x], res_tab + x)) != MP_OKAY) {
|
||||
if ((err = mp_mod_d(a, ltm_prime_tab[x], res_tab + x)) != MP_OKAY) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
@@ -120,8 +120,8 @@ int mp_prime_next_prime(mp_int *a, int t, int bbs_style)
|
||||
res_tab[x] += kstep;
|
||||
|
||||
/* subtract the modulus [instead of using division] */
|
||||
if (res_tab[x] >= __prime_tab[x]) {
|
||||
res_tab[x] -= __prime_tab[x];
|
||||
if (res_tab[x] >= ltm_prime_tab[x]) {
|
||||
res_tab[x] -= ltm_prime_tab[x];
|
||||
}
|
||||
|
||||
/* set flag if zero */
|
||||
@@ -133,7 +133,7 @@ int mp_prime_next_prime(mp_int *a, int t, int bbs_style)
|
||||
|
||||
/* add the step */
|
||||
if ((err = mp_add_d(a, step, a)) != MP_OKAY) {
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
|
||||
/* if didn't pass sieve and step == MAX then skip test */
|
||||
@@ -143,9 +143,9 @@ int mp_prime_next_prime(mp_int *a, int t, int bbs_style)
|
||||
|
||||
/* is this prime? */
|
||||
for (x = 0; x < t; x++) {
|
||||
mp_set(&b, __prime_tab[t]);
|
||||
mp_set(&b, ltm_prime_tab[t]);
|
||||
if ((err = mp_prime_miller_rabin(a, &b, &res)) != MP_OKAY) {
|
||||
goto __ERR;
|
||||
goto LBL_ERR;
|
||||
}
|
||||
if (res == MP_NO) {
|
||||
break;
|
||||
@@ -158,7 +158,7 @@ int mp_prime_next_prime(mp_int *a, int t, int bbs_style)
|
||||
}
|
||||
|
||||
err = MP_OKAY;
|
||||
__ERR:
|
||||
LBL_ERR:
|
||||
mp_clear(&b);
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -47,7 +47,7 @@ int mp_prime_random_ex(mp_int *a, int t, int size, int flags, ltm_prime_callback
|
||||
}
|
||||
|
||||
/* calc the byte size */
|
||||
bsize = (size>>3)+(size&7?1:0);
|
||||
bsize = (size>>3) + ((size&7)?1:0);
|
||||
|
||||
/* we need a buffer of bsize bytes */
|
||||
tmp = OPT_CAST(unsigned char) XMALLOC(bsize);
|
||||
@@ -56,19 +56,19 @@ int mp_prime_random_ex(mp_int *a, int t, int size, int flags, ltm_prime_callback
|
||||
}
|
||||
|
||||
/* calc the maskAND value for the MSbyte*/
|
||||
maskAND = 0xFF >> (8 - (size & 7));
|
||||
maskAND = ((size&7) == 0) ? 0xFF : (0xFF >> (8 - (size & 7)));
|
||||
|
||||
/* calc the maskOR_msb */
|
||||
maskOR_msb = 0;
|
||||
maskOR_msb_offset = (size - 2) >> 3;
|
||||
maskOR_msb_offset = ((size & 7) == 1) ? 1 : 0;
|
||||
if (flags & LTM_PRIME_2MSB_ON) {
|
||||
maskOR_msb |= 1 << ((size - 2) & 7);
|
||||
} else if (flags & LTM_PRIME_2MSB_OFF) {
|
||||
maskAND &= ~(1 << ((size - 2) & 7));
|
||||
}
|
||||
}
|
||||
|
||||
/* get the maskOR_lsb */
|
||||
maskOR_lsb = 0;
|
||||
maskOR_lsb = 1;
|
||||
if (flags & LTM_PRIME_BBS) {
|
||||
maskOR_lsb |= 3;
|
||||
}
|
||||
|
||||
@@ -35,22 +35,29 @@ int mp_radix_size (mp_int * a, int radix, int *size)
|
||||
return MP_VAL;
|
||||
}
|
||||
|
||||
/* init a copy of the input */
|
||||
if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
|
||||
return res;
|
||||
if (mp_iszero(a) == MP_YES) {
|
||||
*size = 2;
|
||||
return MP_OKAY;
|
||||
}
|
||||
|
||||
/* digs is the digit count */
|
||||
digs = 0;
|
||||
|
||||
/* if it's negative add one for the sign */
|
||||
if (t.sign == MP_NEG) {
|
||||
if (a->sign == MP_NEG) {
|
||||
++digs;
|
||||
t.sign = MP_ZPOS;
|
||||
}
|
||||
|
||||
/* init a copy of the input */
|
||||
if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
|
||||
return res;
|
||||
}
|
||||
|
||||
/* force temp to positive */
|
||||
t.sign = MP_ZPOS;
|
||||
|
||||
/* fetch out all of the digits */
|
||||
while (mp_iszero (&t) == 0) {
|
||||
while (mp_iszero (&t) == MP_NO) {
|
||||
if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
|
||||
mp_clear (&t);
|
||||
return res;
|
||||
|
||||
@@ -29,14 +29,14 @@ mp_rand (mp_int * a, int digits)
|
||||
|
||||
/* first place a random non-zero digit */
|
||||
do {
|
||||
d = ((mp_digit) abs (rand ()));
|
||||
d = ((mp_digit) abs (rand ())) & MP_MASK;
|
||||
} while (d == 0);
|
||||
|
||||
if ((res = mp_add_d (a, d, a)) != MP_OKAY) {
|
||||
return res;
|
||||
}
|
||||
|
||||
while (digits-- > 0) {
|
||||
while (--digits > 0) {
|
||||
if ((res = mp_lshd (a, 1)) != MP_OKAY) {
|
||||
return res;
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
*/
|
||||
|
||||
/* read a string [ASCII] in a given radix */
|
||||
int mp_read_radix (mp_int * a, char *str, int radix)
|
||||
int mp_read_radix (mp_int * a, const char *str, int radix)
|
||||
{
|
||||
int y, res, neg;
|
||||
char ch;
|
||||
|
||||
@@ -19,8 +19,7 @@
|
||||
* precomputed via mp_reduce_setup.
|
||||
* From HAC pp.604 Algorithm 14.42
|
||||
*/
|
||||
int
|
||||
mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
|
||||
int mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
|
||||
{
|
||||
mp_int q;
|
||||
int res, um = m->used;
|
||||
@@ -40,11 +39,11 @@ mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
|
||||
}
|
||||
} else {
|
||||
#ifdef BN_S_MP_MUL_HIGH_DIGS_C
|
||||
if ((res = s_mp_mul_high_digs (&q, mu, &q, um - 1)) != MP_OKAY) {
|
||||
if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
|
||||
goto CLEANUP;
|
||||
}
|
||||
#elif defined(BN_FAST_S_MP_MUL_HIGH_DIGS_C)
|
||||
if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um - 1)) != MP_OKAY) {
|
||||
if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
|
||||
goto CLEANUP;
|
||||
}
|
||||
#else
|
||||
|
||||
@@ -16,8 +16,7 @@
|
||||
*/
|
||||
|
||||
/* reduces a modulo n where n is of the form 2**p - d */
|
||||
int
|
||||
mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d)
|
||||
int mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d)
|
||||
{
|
||||
mp_int q;
|
||||
int p, res;
|
||||
|
||||
58
bn_mp_reduce_2k_l.c
Normal file
58
bn_mp_reduce_2k_l.c
Normal file
@@ -0,0 +1,58 @@
|
||||
#include <tommath.h>
|
||||
#ifdef BN_MP_REDUCE_2K_L_C
|
||||
/* LibTomMath, multiple-precision integer library -- Tom St Denis
|
||||
*
|
||||
* LibTomMath is a library that provides multiple-precision
|
||||
* integer arithmetic as well as number theoretic functionality.
|
||||
*
|
||||
* The library was designed directly after the MPI library by
|
||||
* Michael Fromberger but has been written from scratch with
|
||||
* additional optimizations in place.
|
||||
*
|
||||
* The library is free for all purposes without any express
|
||||
* guarantee it works.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
|
||||
*/
|
||||
|
||||
/* reduces a modulo n where n is of the form 2**p - d
|
||||
This differs from reduce_2k since "d" can be larger
|
||||
than a single digit.
|
||||
*/
|
||||
int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d)
|
||||
{
|
||||
mp_int q;
|
||||
int p, res;
|
||||
|
||||
if ((res = mp_init(&q)) != MP_OKAY) {
|
||||
return res;
|
||||
}
|
||||
|
||||
p = mp_count_bits(n);
|
||||
top:
|
||||
/* q = a/2**p, a = a mod 2**p */
|
||||
if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
|
||||
/* q = q * d */
|
||||
if ((res = mp_mul(&q, d, &q)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
|
||||
/* a = a + q */
|
||||
if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
|
||||
if (mp_cmp_mag(a, n) != MP_LT) {
|
||||
s_mp_sub(a, n, a);
|
||||
goto top;
|
||||
}
|
||||
|
||||
ERR:
|
||||
mp_clear(&q);
|
||||
return res;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -16,8 +16,7 @@
|
||||
*/
|
||||
|
||||
/* determines the setup value */
|
||||
int
|
||||
mp_reduce_2k_setup(mp_int *a, mp_digit *d)
|
||||
int mp_reduce_2k_setup(mp_int *a, mp_digit *d)
|
||||
{
|
||||
int res, p;
|
||||
mp_int tmp;
|
||||
|
||||
40
bn_mp_reduce_2k_setup_l.c
Normal file
40
bn_mp_reduce_2k_setup_l.c
Normal file
@@ -0,0 +1,40 @@
|
||||
#include <tommath.h>
|
||||
#ifdef BN_MP_REDUCE_2K_SETUP_L_C
|
||||
/* LibTomMath, multiple-precision integer library -- Tom St Denis
|
||||
*
|
||||
* LibTomMath is a library that provides multiple-precision
|
||||
* integer arithmetic as well as number theoretic functionality.
|
||||
*
|
||||
* The library was designed directly after the MPI library by
|
||||
* Michael Fromberger but has been written from scratch with
|
||||
* additional optimizations in place.
|
||||
*
|
||||
* The library is free for all purposes without any express
|
||||
* guarantee it works.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
|
||||
*/
|
||||
|
||||
/* determines the setup value */
|
||||
int mp_reduce_2k_setup_l(mp_int *a, mp_int *d)
|
||||
{
|
||||
int res;
|
||||
mp_int tmp;
|
||||
|
||||
if ((res = mp_init(&tmp)) != MP_OKAY) {
|
||||
return res;
|
||||
}
|
||||
|
||||
if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
|
||||
if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) {
|
||||
goto ERR;
|
||||
}
|
||||
|
||||
ERR:
|
||||
mp_clear(&tmp);
|
||||
return res;
|
||||
}
|
||||
#endif
|
||||
@@ -22,9 +22,9 @@ int mp_reduce_is_2k(mp_int *a)
|
||||
mp_digit iz;
|
||||
|
||||
if (a->used == 0) {
|
||||
return 0;
|
||||
return MP_NO;
|
||||
} else if (a->used == 1) {
|
||||
return 1;
|
||||
return MP_YES;
|
||||
} else if (a->used > 1) {
|
||||
iy = mp_count_bits(a);
|
||||
iz = 1;
|
||||
@@ -33,7 +33,7 @@ int mp_reduce_is_2k(mp_int *a)
|
||||
/* Test every bit from the second digit up, must be 1 */
|
||||
for (ix = DIGIT_BIT; ix < iy; ix++) {
|
||||
if ((a->dp[iw] & iz) == 0) {
|
||||
return 0;
|
||||
return MP_NO;
|
||||
}
|
||||
iz <<= 1;
|
||||
if (iz > (mp_digit)MP_MASK) {
|
||||
@@ -42,7 +42,7 @@ int mp_reduce_is_2k(mp_int *a)
|
||||
}
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
return MP_YES;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
40
bn_mp_reduce_is_2k_l.c
Normal file
40
bn_mp_reduce_is_2k_l.c
Normal file
@@ -0,0 +1,40 @@
|
||||
#include <tommath.h>
|
||||
#ifdef BN_MP_REDUCE_IS_2K_L_C
|
||||
/* LibTomMath, multiple-precision integer library -- Tom St Denis
|
||||
*
|
||||
* LibTomMath is a library that provides multiple-precision
|
||||
* integer arithmetic as well as number theoretic functionality.
|
||||
*
|
||||
* The library was designed directly after the MPI library by
|
||||
* Michael Fromberger but has been written from scratch with
|
||||
* additional optimizations in place.
|
||||
*
|
||||
* The library is free for all purposes without any express
|
||||
* guarantee it works.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
|
||||
*/
|
||||
|
||||
/* determines if reduce_2k_l can be used */
|
||||
int mp_reduce_is_2k_l(mp_int *a)
|
||||
{
|
||||
int ix, iy;
|
||||
|
||||
if (a->used == 0) {
|
||||
return MP_NO;
|
||||
} else if (a->used == 1) {
|
||||
return MP_YES;
|
||||
} else if (a->used > 1) {
|
||||
/* if more than half of the digits are -1 we're sold */
|
||||
for (iy = ix = 0; ix < a->used; ix++) {
|
||||
if (a->dp[ix] == MP_MASK) {
|
||||
++iy;
|
||||
}
|
||||
}
|
||||
return (iy >= (a->used/2)) ? MP_YES : MP_NO;
|
||||
|
||||
}
|
||||
return MP_NO;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -16,8 +16,7 @@
|
||||
*/
|
||||
|
||||
/* store in signed [big endian] format */
|
||||
int
|
||||
mp_to_signed_bin (mp_int * a, unsigned char *b)
|
||||
int mp_to_signed_bin (mp_int * a, unsigned char *b)
|
||||
{
|
||||
int res;
|
||||
|
||||
|
||||
27
bn_mp_to_signed_bin_n.c
Normal file
27
bn_mp_to_signed_bin_n.c
Normal file
@@ -0,0 +1,27 @@
|
||||
#include <tommath.h>
|
||||
#ifdef BN_MP_TO_SIGNED_BIN_N_C
|
||||
/* LibTomMath, multiple-precision integer library -- Tom St Denis
|
||||
*
|
||||
* LibTomMath is a library that provides multiple-precision
|
||||
* integer arithmetic as well as number theoretic functionality.
|
||||
*
|
||||
* The library was designed directly after the MPI library by
|
||||
* Michael Fromberger but has been written from scratch with
|
||||
* additional optimizations in place.
|
||||
*
|
||||
* The library is free for all purposes without any express
|
||||
* guarantee it works.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
|
||||
*/
|
||||
|
||||
/* store in signed [big endian] format */
|
||||
int mp_to_signed_bin_n (mp_int * a, unsigned char *b, unsigned long *outlen)
|
||||
{
|
||||
if (*outlen < (unsigned long)mp_signed_bin_size(a)) {
|
||||
return MP_VAL;
|
||||
}
|
||||
*outlen = mp_signed_bin_size(a);
|
||||
return mp_to_signed_bin(a, b);
|
||||
}
|
||||
#endif
|
||||
@@ -16,8 +16,7 @@
|
||||
*/
|
||||
|
||||
/* store in unsigned [big endian] format */
|
||||
int
|
||||
mp_to_unsigned_bin (mp_int * a, unsigned char *b)
|
||||
int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
|
||||
{
|
||||
int x, res;
|
||||
mp_int t;
|
||||
|
||||
27
bn_mp_to_unsigned_bin_n.c
Normal file
27
bn_mp_to_unsigned_bin_n.c
Normal file
@@ -0,0 +1,27 @@
|
||||
#include <tommath.h>
|
||||
#ifdef BN_MP_TO_UNSIGNED_BIN_N_C
|
||||
/* LibTomMath, multiple-precision integer library -- Tom St Denis
|
||||
*
|
||||
* LibTomMath is a library that provides multiple-precision
|
||||
* integer arithmetic as well as number theoretic functionality.
|
||||
*
|
||||
* The library was designed directly after the MPI library by
|
||||
* Michael Fromberger but has been written from scratch with
|
||||
* additional optimizations in place.
|
||||
*
|
||||
* The library is free for all purposes without any express
|
||||
* guarantee it works.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
|
||||
*/
|
||||
|
||||
/* store in unsigned [big endian] format */
|
||||
int mp_to_unsigned_bin_n (mp_int * a, unsigned char *b, unsigned long *outlen)
|
||||
{
|
||||
if (*outlen < (unsigned long)mp_unsigned_bin_size(a)) {
|
||||
return MP_VAL;
|
||||
}
|
||||
*outlen = mp_unsigned_bin_size(a);
|
||||
return mp_to_unsigned_bin(a, b);
|
||||
}
|
||||
#endif
|
||||
@@ -17,9 +17,10 @@
|
||||
|
||||
/* multiplication using the Toom-Cook 3-way algorithm
|
||||
*
|
||||
* Much more complicated than Karatsuba but has a lower asymptotic running time of
|
||||
* O(N**1.464). This algorithm is only particularly useful on VERY large
|
||||
* inputs (we're talking 1000s of digits here...).
|
||||
* Much more complicated than Karatsuba but has a lower
|
||||
* asymptotic running time of O(N**1.464). This algorithm is
|
||||
* only particularly useful on VERY large inputs
|
||||
* (we're talking 1000s of digits here...).
|
||||
*/
|
||||
int mp_toom_mul(mp_int *a, mp_int *b, mp_int *c)
|
||||
{
|
||||
|
||||
@@ -16,8 +16,7 @@
|
||||
*/
|
||||
|
||||
/* get the size for an unsigned equivalent */
|
||||
int
|
||||
mp_unsigned_bin_size (mp_int * a)
|
||||
int mp_unsigned_bin_size (mp_int * a)
|
||||
{
|
||||
int size = mp_count_bits (a);
|
||||
return (size / 8 + ((size & 7) != 0 ? 1 : 0));
|
||||
|
||||
@@ -37,7 +37,7 @@ mp_xor (mp_int * a, mp_int * b, mp_int * c)
|
||||
}
|
||||
|
||||
for (ix = 0; ix < px; ix++) {
|
||||
|
||||
t.dp[ix] ^= x->dp[ix];
|
||||
}
|
||||
mp_clamp (&t);
|
||||
mp_exch (c, &t);
|
||||
|
||||
12
bn_mp_zero.c
12
bn_mp_zero.c
@@ -16,11 +16,17 @@
|
||||
*/
|
||||
|
||||
/* set to zero */
|
||||
void
|
||||
mp_zero (mp_int * a)
|
||||
void mp_zero (mp_int * a)
|
||||
{
|
||||
int n;
|
||||
mp_digit *tmp;
|
||||
|
||||
a->sign = MP_ZPOS;
|
||||
a->used = 0;
|
||||
memset (a->dp, 0, sizeof (mp_digit) * a->alloc);
|
||||
|
||||
tmp = a->dp;
|
||||
for (n = 0; n < a->alloc; n++) {
|
||||
*tmp++ = 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
|
||||
*/
|
||||
const mp_digit __prime_tab[] = {
|
||||
const mp_digit ltm_prime_tab[] = {
|
||||
0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
|
||||
0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
|
||||
0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
|
||||
|
||||
@@ -21,11 +21,12 @@
|
||||
#define TAB_SIZE 256
|
||||
#endif
|
||||
|
||||
int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
|
||||
int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
|
||||
{
|
||||
mp_int M[TAB_SIZE], res, mu;
|
||||
mp_digit buf;
|
||||
int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
|
||||
int (*redux)(mp_int*,mp_int*,mp_int*);
|
||||
|
||||
/* find window size */
|
||||
x = mp_count_bits (X);
|
||||
@@ -70,11 +71,20 @@ int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
|
||||
|
||||
/* create mu, used for Barrett reduction */
|
||||
if ((err = mp_init (&mu)) != MP_OKAY) {
|
||||
goto __M;
|
||||
}
|
||||
if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
|
||||
goto __MU;
|
||||
goto LBL_M;
|
||||
}
|
||||
|
||||
if (redmode == 0) {
|
||||
if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
|
||||
goto LBL_MU;
|
||||
}
|
||||
redux = mp_reduce;
|
||||
} else {
|
||||
if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) {
|
||||
goto LBL_MU;
|
||||
}
|
||||
redux = mp_reduce_2k_l;
|
||||
}
|
||||
|
||||
/* create M table
|
||||
*
|
||||
@@ -85,23 +95,26 @@ int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
|
||||
* computed though accept for M[0] and M[1]
|
||||
*/
|
||||
if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
|
||||
goto __MU;
|
||||
goto LBL_MU;
|
||||
}
|
||||
|
||||
/* compute the value at M[1<<(winsize-1)] by squaring
|
||||
* M[1] (winsize-1) times
|
||||
*/
|
||||
if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
|
||||
goto __MU;
|
||||
goto LBL_MU;
|
||||
}
|
||||
|
||||
for (x = 0; x < (winsize - 1); x++) {
|
||||
/* square it */
|
||||
if ((err = mp_sqr (&M[1 << (winsize - 1)],
|
||||
&M[1 << (winsize - 1)])) != MP_OKAY) {
|
||||
goto __MU;
|
||||
goto LBL_MU;
|
||||
}
|
||||
if ((err = mp_reduce (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
|
||||
goto __MU;
|
||||
|
||||
/* reduce modulo P */
|
||||
if ((err = redux (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
|
||||
goto LBL_MU;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -110,16 +123,16 @@ int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
|
||||
*/
|
||||
for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
|
||||
if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
|
||||
goto __MU;
|
||||
goto LBL_MU;
|
||||
}
|
||||
if ((err = mp_reduce (&M[x], P, &mu)) != MP_OKAY) {
|
||||
goto __MU;
|
||||
if ((err = redux (&M[x], P, &mu)) != MP_OKAY) {
|
||||
goto LBL_MU;
|
||||
}
|
||||
}
|
||||
|
||||
/* setup result */
|
||||
if ((err = mp_init (&res)) != MP_OKAY) {
|
||||
goto __MU;
|
||||
goto LBL_MU;
|
||||
}
|
||||
mp_set (&res, 1);
|
||||
|
||||
@@ -159,10 +172,10 @@ int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
|
||||
/* if the bit is zero and mode == 1 then we square */
|
||||
if (mode == 1 && y == 0) {
|
||||
if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
|
||||
goto __RES;
|
||||
goto LBL_RES;
|
||||
}
|
||||
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
|
||||
goto __RES;
|
||||
if ((err = redux (&res, P, &mu)) != MP_OKAY) {
|
||||
goto LBL_RES;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
@@ -176,19 +189,19 @@ int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
|
||||
/* square first */
|
||||
for (x = 0; x < winsize; x++) {
|
||||
if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
|
||||
goto __RES;
|
||||
goto LBL_RES;
|
||||
}
|
||||
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
|
||||
goto __RES;
|
||||
if ((err = redux (&res, P, &mu)) != MP_OKAY) {
|
||||
goto LBL_RES;
|
||||
}
|
||||
}
|
||||
|
||||
/* then multiply */
|
||||
if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
|
||||
goto __RES;
|
||||
goto LBL_RES;
|
||||
}
|
||||
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
|
||||
goto __RES;
|
||||
if ((err = redux (&res, P, &mu)) != MP_OKAY) {
|
||||
goto LBL_RES;
|
||||
}
|
||||
|
||||
/* empty window and reset */
|
||||
@@ -203,20 +216,20 @@ int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
|
||||
/* square then multiply if the bit is set */
|
||||
for (x = 0; x < bitcpy; x++) {
|
||||
if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
|
||||
goto __RES;
|
||||
goto LBL_RES;
|
||||
}
|
||||
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
|
||||
goto __RES;
|
||||
if ((err = redux (&res, P, &mu)) != MP_OKAY) {
|
||||
goto LBL_RES;
|
||||
}
|
||||
|
||||
bitbuf <<= 1;
|
||||
if ((bitbuf & (1 << winsize)) != 0) {
|
||||
/* then multiply */
|
||||
if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
|
||||
goto __RES;
|
||||
goto LBL_RES;
|
||||
}
|
||||
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
|
||||
goto __RES;
|
||||
if ((err = redux (&res, P, &mu)) != MP_OKAY) {
|
||||
goto LBL_RES;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -224,9 +237,9 @@ int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
|
||||
|
||||
mp_exch (&res, Y);
|
||||
err = MP_OKAY;
|
||||
__RES:mp_clear (&res);
|
||||
__MU:mp_clear (&mu);
|
||||
__M:
|
||||
LBL_RES:mp_clear (&res);
|
||||
LBL_MU:mp_clear (&mu);
|
||||
LBL_M:
|
||||
mp_clear(&M[1]);
|
||||
for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
|
||||
mp_clear (&M[x]);
|
||||
|
||||
@@ -19,8 +19,7 @@
|
||||
* HAC pp. 595, Algorithm 14.12 Modified so you can control how
|
||||
* many digits of output are created.
|
||||
*/
|
||||
int
|
||||
s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
|
||||
int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
|
||||
{
|
||||
mp_int t;
|
||||
int res, pa, pb, ix, iy;
|
||||
|
||||
@@ -16,8 +16,7 @@
|
||||
*/
|
||||
|
||||
/* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
|
||||
int
|
||||
s_mp_sqr (mp_int * a, mp_int * b)
|
||||
int s_mp_sqr (mp_int * a, mp_int * b)
|
||||
{
|
||||
mp_int t;
|
||||
int res, ix, iy, pa;
|
||||
|
||||
5
bncore.c
5
bncore.c
@@ -20,11 +20,12 @@
|
||||
CPU /Compiler /MUL CUTOFF/SQR CUTOFF
|
||||
-------------------------------------------------------------
|
||||
Intel P4 Northwood /GCC v3.4.1 / 88/ 128/LTM 0.32 ;-)
|
||||
AMD Athlon64 /GCC v3.4.4 / 74/ 124/LTM 0.34
|
||||
|
||||
*/
|
||||
|
||||
int KARATSUBA_MUL_CUTOFF = 88, /* Min. number of digits before Karatsuba multiplication is used. */
|
||||
KARATSUBA_SQR_CUTOFF = 128, /* Min. number of digits before Karatsuba squaring is used. */
|
||||
int KARATSUBA_MUL_CUTOFF = 74, /* Min. number of digits before Karatsuba multiplication is used. */
|
||||
KARATSUBA_SQR_CUTOFF = 124, /* Min. number of digits before Karatsuba squaring is used. */
|
||||
|
||||
TOOM_MUL_CUTOFF = 350, /* no optimal values of these are known yet so set em high */
|
||||
TOOM_SQR_CUTOFF = 400;
|
||||
|
||||
6081
callgraph.txt
6081
callgraph.txt
File diff suppressed because it is too large
Load Diff
32
changes.txt
32
changes.txt
@@ -1,3 +1,35 @@
|
||||
March 12th, 2005
|
||||
v0.35 -- Stupid XOR function missing line again... oops.
|
||||
-- Fixed bug in invmod not handling negative inputs correctly [Wolfgang Ehrhardt]
|
||||
-- Made exteuclid always give positive u3 output...[ Wolfgang Ehrhardt ]
|
||||
-- [Wolfgang Ehrhardt] Suggested a fix for mp_reduce() which avoided underruns. ;-)
|
||||
-- mp_rand() would emit one too many digits and it was possible to get a 0 out of it ... oops
|
||||
-- Added montgomery to the testing to make sure it handles 1..10 digit moduli correctly
|
||||
-- Fixed bug in comba that would lead to possible erroneous outputs when "pa < digs"
|
||||
-- Fixed bug in mp_toradix_size for "0" [Kevin Kenny]
|
||||
-- Updated chapters 1-5 of the textbook ;-) It now talks about the new comba code!
|
||||
|
||||
February 12th, 2005
|
||||
v0.34 -- Fixed two more small errors in mp_prime_random_ex()
|
||||
-- Fixed overflow in mp_mul_d() [Kevin Kenny]
|
||||
-- Added mp_to_(un)signed_bin_n() functions which do bounds checking for ya [and report the size]
|
||||
-- Added "large" diminished radix support. Speeds up things like DSA where the moduli is of the form 2^k - P for some P < 2^(k/2) or so
|
||||
Actually is faster than Montgomery on my AMD64 (and probably much faster on a P4)
|
||||
-- Updated the manual a bit
|
||||
-- Ok so I haven't done the textbook work yet... My current freelance gig has landed me in France till the
|
||||
end of Feb/05. Once I get back I'll have tons of free time and I plan to go to town on the book.
|
||||
As of this release the API will freeze. At least until the book catches up with all the changes. I welcome
|
||||
bug reports but new algorithms will have to wait.
|
||||
|
||||
December 23rd, 2004
|
||||
v0.33 -- Fixed "small" variant for mp_div() which would munge with negative dividends...
|
||||
-- Fixed bug in mp_prime_random_ex() which would set the most significant byte to zero when
|
||||
no special flags were set
|
||||
-- Fixed overflow [minor] bug in fast_s_mp_sqr()
|
||||
-- Made the makefiles easier to configure the group/user that ltm will install as
|
||||
-- Fixed "final carry" bug in comba multipliers. (Volkan Ceylan)
|
||||
-- Matt Johnston pointed out a missing semi-colon in mp_exptmod
|
||||
|
||||
October 29th, 2004
|
||||
v0.32 -- Added "makefile.shared" for shared object support
|
||||
-- Added more to the build options/configs in the manual
|
||||
|
||||
951
demo/demo.c
951
demo/demo.c
File diff suppressed because it is too large
Load Diff
384
demo/timing.c
384
demo/timing.c
@@ -11,15 +11,16 @@ ulong64 _tt;
|
||||
#endif
|
||||
|
||||
|
||||
void ndraw(mp_int *a, char *name)
|
||||
void ndraw(mp_int * a, char *name)
|
||||
{
|
||||
char buf[4096];
|
||||
|
||||
printf("%s: ", name);
|
||||
mp_toradix(a, buf, 64);
|
||||
printf("%s\n", buf);
|
||||
}
|
||||
|
||||
static void draw(mp_int *a)
|
||||
static void draw(mp_int * a)
|
||||
{
|
||||
ndraw(a, "");
|
||||
}
|
||||
@@ -38,40 +39,39 @@ int lbit(void)
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(__i386__) || defined(_M_IX86) || defined(_M_AMD64)
|
||||
/* RDTSC from Scott Duplichan */
|
||||
static ulong64 TIMFUNC (void)
|
||||
{
|
||||
#if defined __GNUC__
|
||||
#ifdef __i386__
|
||||
ulong64 a;
|
||||
__asm__ __volatile__ ("rdtsc ":"=A" (a));
|
||||
return a;
|
||||
#else /* gcc-IA64 version */
|
||||
unsigned long result;
|
||||
__asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory");
|
||||
while (__builtin_expect ((int) result == -1, 0))
|
||||
__asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory");
|
||||
return result;
|
||||
#endif
|
||||
static ulong64 TIMFUNC(void)
|
||||
{
|
||||
#if defined __GNUC__
|
||||
#if defined(__i386__) || defined(__x86_64__)
|
||||
unsigned long long a;
|
||||
__asm__ __volatile__("rdtsc\nmovl %%eax,%0\nmovl %%edx,4+%0\n"::
|
||||
"m"(a):"%eax", "%edx");
|
||||
return a;
|
||||
#else /* gcc-IA64 version */
|
||||
unsigned long result;
|
||||
__asm__ __volatile__("mov %0=ar.itc":"=r"(result)::"memory");
|
||||
|
||||
while (__builtin_expect((int) result == -1, 0))
|
||||
__asm__ __volatile__("mov %0=ar.itc":"=r"(result)::"memory");
|
||||
|
||||
return result;
|
||||
#endif
|
||||
|
||||
// Microsoft and Intel Windows compilers
|
||||
#elif defined _M_IX86
|
||||
__asm rdtsc
|
||||
#elif defined _M_AMD64
|
||||
return __rdtsc ();
|
||||
#elif defined _M_IA64
|
||||
#if defined __INTEL_COMPILER
|
||||
#include <ia64intrin.h>
|
||||
#endif
|
||||
return __getReg (3116);
|
||||
#else
|
||||
#error need rdtsc function for this build
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
#define TIMFUNC clock
|
||||
#elif defined _M_IX86
|
||||
__asm rdtsc
|
||||
#elif defined _M_AMD64
|
||||
return __rdtsc();
|
||||
#elif defined _M_IA64
|
||||
#if defined __INTEL_COMPILER
|
||||
#include <ia64intrin.h>
|
||||
#endif
|
||||
return __getReg(3116);
|
||||
#else
|
||||
#error need rdtsc function for this build
|
||||
#endif
|
||||
}
|
||||
|
||||
#define DO(x) x; x;
|
||||
//#define DO4(x) DO2(x); DO2(x);
|
||||
@@ -81,7 +81,7 @@ static ulong64 TIMFUNC (void)
|
||||
int main(void)
|
||||
{
|
||||
ulong64 tt, gg, CLK_PER_SEC;
|
||||
FILE *log, *logb, *logc;
|
||||
FILE *log, *logb, *logc, *logd;
|
||||
mp_int a, b, c, d, e, f;
|
||||
int n, cnt, ix, old_kara_m, old_kara_s;
|
||||
unsigned rr;
|
||||
@@ -94,168 +94,191 @@ int main(void)
|
||||
mp_init(&f);
|
||||
|
||||
srand(time(NULL));
|
||||
|
||||
|
||||
/* temp. turn off TOOM */
|
||||
TOOM_MUL_CUTOFF = TOOM_SQR_CUTOFF = 100000;
|
||||
|
||||
CLK_PER_SEC = TIMFUNC();
|
||||
sleep(1);
|
||||
CLK_PER_SEC = TIMFUNC() - CLK_PER_SEC;
|
||||
/* temp. turn off TOOM */
|
||||
TOOM_MUL_CUTOFF = TOOM_SQR_CUTOFF = 100000;
|
||||
|
||||
printf("CLK_PER_SEC == %llu\n", CLK_PER_SEC);
|
||||
|
||||
log = fopen("logs/add.log", "w");
|
||||
for (cnt = 8; cnt <= 128; cnt += 8) {
|
||||
SLEEP;
|
||||
mp_rand(&a, cnt);
|
||||
mp_rand(&b, cnt);
|
||||
rr = 0;
|
||||
tt = -1;
|
||||
do {
|
||||
gg = TIMFUNC();
|
||||
DO(mp_add(&a,&b,&c));
|
||||
gg = (TIMFUNC() - gg)>>1;
|
||||
if (tt > gg) tt = gg;
|
||||
} while (++rr < 100000);
|
||||
printf("Adding\t\t%4d-bit => %9llu/sec, %9llu cycles\n", mp_count_bits(&a), CLK_PER_SEC/tt, tt);
|
||||
fprintf(log, "%d %9llu\n", cnt*DIGIT_BIT, tt); fflush(log);
|
||||
}
|
||||
fclose(log);
|
||||
CLK_PER_SEC = TIMFUNC();
|
||||
sleep(1);
|
||||
CLK_PER_SEC = TIMFUNC() - CLK_PER_SEC;
|
||||
|
||||
log = fopen("logs/sub.log", "w");
|
||||
for (cnt = 8; cnt <= 128; cnt += 8) {
|
||||
SLEEP;
|
||||
mp_rand(&a, cnt);
|
||||
mp_rand(&b, cnt);
|
||||
rr = 0;
|
||||
tt = -1;
|
||||
do {
|
||||
gg = TIMFUNC();
|
||||
DO(mp_sub(&a,&b,&c));
|
||||
gg = (TIMFUNC() - gg)>>1;
|
||||
if (tt > gg) tt = gg;
|
||||
} while (++rr < 100000);
|
||||
printf("CLK_PER_SEC == %llu\n", CLK_PER_SEC);
|
||||
goto exptmod;
|
||||
log = fopen("logs/add.log", "w");
|
||||
for (cnt = 8; cnt <= 128; cnt += 8) {
|
||||
SLEEP;
|
||||
mp_rand(&a, cnt);
|
||||
mp_rand(&b, cnt);
|
||||
rr = 0;
|
||||
tt = -1;
|
||||
do {
|
||||
gg = TIMFUNC();
|
||||
DO(mp_add(&a, &b, &c));
|
||||
gg = (TIMFUNC() - gg) >> 1;
|
||||
if (tt > gg)
|
||||
tt = gg;
|
||||
} while (++rr < 100000);
|
||||
printf("Adding\t\t%4d-bit => %9llu/sec, %9llu cycles\n",
|
||||
mp_count_bits(&a), CLK_PER_SEC / tt, tt);
|
||||
fprintf(log, "%d %9llu\n", cnt * DIGIT_BIT, tt);
|
||||
fflush(log);
|
||||
}
|
||||
fclose(log);
|
||||
|
||||
printf("Subtracting\t\t%4d-bit => %9llu/sec, %9llu cycles\n", mp_count_bits(&a), CLK_PER_SEC/tt, tt);
|
||||
fprintf(log, "%d %9llu\n", cnt*DIGIT_BIT, tt); fflush(log);
|
||||
}
|
||||
fclose(log);
|
||||
log = fopen("logs/sub.log", "w");
|
||||
for (cnt = 8; cnt <= 128; cnt += 8) {
|
||||
SLEEP;
|
||||
mp_rand(&a, cnt);
|
||||
mp_rand(&b, cnt);
|
||||
rr = 0;
|
||||
tt = -1;
|
||||
do {
|
||||
gg = TIMFUNC();
|
||||
DO(mp_sub(&a, &b, &c));
|
||||
gg = (TIMFUNC() - gg) >> 1;
|
||||
if (tt > gg)
|
||||
tt = gg;
|
||||
} while (++rr < 100000);
|
||||
|
||||
printf("Subtracting\t\t%4d-bit => %9llu/sec, %9llu cycles\n",
|
||||
mp_count_bits(&a), CLK_PER_SEC / tt, tt);
|
||||
fprintf(log, "%d %9llu\n", cnt * DIGIT_BIT, tt);
|
||||
fflush(log);
|
||||
}
|
||||
fclose(log);
|
||||
|
||||
/* do mult/square twice, first without karatsuba and second with */
|
||||
multtest:
|
||||
old_kara_m = KARATSUBA_MUL_CUTOFF;
|
||||
old_kara_s = KARATSUBA_SQR_CUTOFF;
|
||||
for (ix = 0; ix < 1; ix++) {
|
||||
printf("With%s Karatsuba\n", (ix==0)?"out":"");
|
||||
for (ix = 0; ix < 2; ix++) {
|
||||
printf("With%s Karatsuba\n", (ix == 0) ? "out" : "");
|
||||
|
||||
KARATSUBA_MUL_CUTOFF = (ix==0)?9999:old_kara_m;
|
||||
KARATSUBA_SQR_CUTOFF = (ix==0)?9999:old_kara_s;
|
||||
KARATSUBA_MUL_CUTOFF = (ix == 0) ? 9999 : old_kara_m;
|
||||
KARATSUBA_SQR_CUTOFF = (ix == 0) ? 9999 : old_kara_s;
|
||||
|
||||
log = fopen((ix==0)?"logs/mult.log":"logs/mult_kara.log", "w");
|
||||
for (cnt = 4; cnt <= 288; cnt += 2) {
|
||||
SLEEP;
|
||||
mp_rand(&a, cnt);
|
||||
mp_rand(&b, cnt);
|
||||
rr = 0;
|
||||
tt = -1;
|
||||
do {
|
||||
gg = TIMFUNC();
|
||||
DO(mp_mul(&a, &b, &c));
|
||||
gg = (TIMFUNC() - gg)>>1;
|
||||
if (tt > gg) tt = gg;
|
||||
} while (++rr < 100);
|
||||
printf("Multiplying\t%4d-bit => %9llu/sec, %9llu cycles\n", mp_count_bits(&a), CLK_PER_SEC/tt, tt);
|
||||
fprintf(log, "%d %9llu\n", mp_count_bits(&a), tt); fflush(log);
|
||||
log = fopen((ix == 0) ? "logs/mult.log" : "logs/mult_kara.log", "w");
|
||||
for (cnt = 4; cnt <= 10240 / DIGIT_BIT; cnt += 2) {
|
||||
SLEEP;
|
||||
mp_rand(&a, cnt);
|
||||
mp_rand(&b, cnt);
|
||||
rr = 0;
|
||||
tt = -1;
|
||||
do {
|
||||
gg = TIMFUNC();
|
||||
DO(mp_mul(&a, &b, &c));
|
||||
gg = (TIMFUNC() - gg) >> 1;
|
||||
if (tt > gg)
|
||||
tt = gg;
|
||||
} while (++rr < 100);
|
||||
printf("Multiplying\t%4d-bit => %9llu/sec, %9llu cycles\n",
|
||||
mp_count_bits(&a), CLK_PER_SEC / tt, tt);
|
||||
fprintf(log, "%d %9llu\n", mp_count_bits(&a), tt);
|
||||
fflush(log);
|
||||
}
|
||||
fclose(log);
|
||||
|
||||
log = fopen((ix==0)?"logs/sqr.log":"logs/sqr_kara.log", "w");
|
||||
for (cnt = 4; cnt <= 288; cnt += 2) {
|
||||
SLEEP;
|
||||
mp_rand(&a, cnt);
|
||||
rr = 0;
|
||||
tt = -1;
|
||||
do {
|
||||
gg = TIMFUNC();
|
||||
DO(mp_sqr(&a, &b));
|
||||
gg = (TIMFUNC() - gg)>>1;
|
||||
if (tt > gg) tt = gg;
|
||||
} while (++rr < 100);
|
||||
printf("Squaring\t%4d-bit => %9llu/sec, %9llu cycles\n", mp_count_bits(&a), CLK_PER_SEC/tt, tt);
|
||||
fprintf(log, "%d %9llu\n", mp_count_bits(&a), tt); fflush(log);
|
||||
log = fopen((ix == 0) ? "logs/sqr.log" : "logs/sqr_kara.log", "w");
|
||||
for (cnt = 4; cnt <= 10240 / DIGIT_BIT; cnt += 2) {
|
||||
SLEEP;
|
||||
mp_rand(&a, cnt);
|
||||
rr = 0;
|
||||
tt = -1;
|
||||
do {
|
||||
gg = TIMFUNC();
|
||||
DO(mp_sqr(&a, &b));
|
||||
gg = (TIMFUNC() - gg) >> 1;
|
||||
if (tt > gg)
|
||||
tt = gg;
|
||||
} while (++rr < 100);
|
||||
printf("Squaring\t%4d-bit => %9llu/sec, %9llu cycles\n",
|
||||
mp_count_bits(&a), CLK_PER_SEC / tt, tt);
|
||||
fprintf(log, "%d %9llu\n", mp_count_bits(&a), tt);
|
||||
fflush(log);
|
||||
}
|
||||
fclose(log);
|
||||
|
||||
}
|
||||
exptmod:
|
||||
|
||||
{
|
||||
{
|
||||
char *primes[] = {
|
||||
/* 2K moduli mersenne primes */
|
||||
"6864797660130609714981900799081393217269435300143305409394463459185543183397656052122559640661454554977296311391480858037121987999716643812574028291115057151",
|
||||
"531137992816767098689588206552468627329593117727031923199444138200403559860852242739162502265229285668889329486246501015346579337652707239409519978766587351943831270835393219031728127",
|
||||
"10407932194664399081925240327364085538615262247266704805319112350403608059673360298012239441732324184842421613954281007791383566248323464908139906605677320762924129509389220345773183349661583550472959420547689811211693677147548478866962501384438260291732348885311160828538416585028255604666224831890918801847068222203140521026698435488732958028878050869736186900714720710555703168729087",
|
||||
"1475979915214180235084898622737381736312066145333169775147771216478570297878078949377407337049389289382748507531496480477281264838760259191814463365330269540496961201113430156902396093989090226259326935025281409614983499388222831448598601834318536230923772641390209490231836446899608210795482963763094236630945410832793769905399982457186322944729636418890623372171723742105636440368218459649632948538696905872650486914434637457507280441823676813517852099348660847172579408422316678097670224011990280170474894487426924742108823536808485072502240519452587542875349976558572670229633962575212637477897785501552646522609988869914013540483809865681250419497686697771007",
|
||||
"259117086013202627776246767922441530941818887553125427303974923161874019266586362086201209516800483406550695241733194177441689509238807017410377709597512042313066624082916353517952311186154862265604547691127595848775610568757931191017711408826252153849035830401185072116424747461823031471398340229288074545677907941037288235820705892351068433882986888616658650280927692080339605869308790500409503709875902119018371991620994002568935113136548829739112656797303241986517250116412703509705427773477972349821676443446668383119322540099648994051790241624056519054483690809616061625743042361721863339415852426431208737266591962061753535748892894599629195183082621860853400937932839420261866586142503251450773096274235376822938649407127700846077124211823080804139298087057504713825264571448379371125032081826126566649084251699453951887789613650248405739378594599444335231188280123660406262468609212150349937584782292237144339628858485938215738821232393687046160677362909315071",
|
||||
"190797007524439073807468042969529173669356994749940177394741882673528979787005053706368049835514900244303495954950709725762186311224148828811920216904542206960744666169364221195289538436845390250168663932838805192055137154390912666527533007309292687539092257043362517857366624699975402375462954490293259233303137330643531556539739921926201438606439020075174723029056838272505051571967594608350063404495977660656269020823960825567012344189908927956646011998057988548630107637380993519826582389781888135705408653045219655801758081251164080554609057468028203308718724654081055323215860189611391296030471108443146745671967766308925858547271507311563765171008318248647110097614890313562856541784154881743146033909602737947385055355960331855614540900081456378659068370317267696980001187750995491090350108417050917991562167972281070161305972518044872048331306383715094854938415738549894606070722584737978176686422134354526989443028353644037187375385397838259511833166416134323695660367676897722287918773420968982326089026150031515424165462111337527431154890666327374921446276833564519776797633875503548665093914556482031482248883127023777039667707976559857333357013727342079099064400455741830654320379350833236245819348824064783585692924881021978332974949906122664421376034687815350484991",
|
||||
/* 2K large moduli */
|
||||
"179769313486231590772930519078902473361797697894230657273430081157732675805500963132708477322407536021120113879871393357658789768814416622492847430639474124377767893424865485276302219601246094119453082952085005768838150682342462881473913110540827237163350510684586239334100047359817950870678242457666208137217",
|
||||
"32317006071311007300714876688669951960444102669715484032130345427524655138867890893197201411522913463688717960921898019494119559150490921095088152386448283120630877367300996091750197750389652106796057638384067568276792218642619756161838094338476170470581645852036305042887575891541065808607552399123930385521914333389668342420684974786564569494856176035326322058077805659331026192708460314150258592864177116725943603718461857357598351152301645904403697613233287231227125684710820209725157101726931323469678542580656697935045997268352998638099733077152121140120031150424541696791951097529546801429027668869927491725169",
|
||||
"1044388881413152506691752710716624382579964249047383780384233483283953907971557456848826811934997558340890106714439262837987573438185793607263236087851365277945956976543709998340361590134383718314428070011855946226376318839397712745672334684344586617496807908705803704071284048740118609114467977783598029006686938976881787785946905630190260940599579453432823469303026696443059025015972399867714215541693835559885291486318237914434496734087811872639496475100189041349008417061675093668333850551032972088269550769983616369411933015213796825837188091833656751221318492846368125550225998300412344784862595674492194617023806505913245610825731835380087608622102834270197698202313169017678006675195485079921636419370285375124784014907159135459982790513399611551794271106831134090584272884279791554849782954323534517065223269061394905987693002122963395687782878948440616007412945674919823050571642377154816321380631045902916136926708342856440730447899971901781465763473223850267253059899795996090799469201774624817718449867455659250178329070473119433165550807568221846571746373296884912819520317457002440926616910874148385078411929804522981857338977648103126085902995208257421855249796721729039744118165938433694823325696642096892124547425283",
|
||||
/* 2K moduli mersenne primes */
|
||||
"6864797660130609714981900799081393217269435300143305409394463459185543183397656052122559640661454554977296311391480858037121987999716643812574028291115057151",
|
||||
"531137992816767098689588206552468627329593117727031923199444138200403559860852242739162502265229285668889329486246501015346579337652707239409519978766587351943831270835393219031728127",
|
||||
"10407932194664399081925240327364085538615262247266704805319112350403608059673360298012239441732324184842421613954281007791383566248323464908139906605677320762924129509389220345773183349661583550472959420547689811211693677147548478866962501384438260291732348885311160828538416585028255604666224831890918801847068222203140521026698435488732958028878050869736186900714720710555703168729087",
|
||||
"1475979915214180235084898622737381736312066145333169775147771216478570297878078949377407337049389289382748507531496480477281264838760259191814463365330269540496961201113430156902396093989090226259326935025281409614983499388222831448598601834318536230923772641390209490231836446899608210795482963763094236630945410832793769905399982457186322944729636418890623372171723742105636440368218459649632948538696905872650486914434637457507280441823676813517852099348660847172579408422316678097670224011990280170474894487426924742108823536808485072502240519452587542875349976558572670229633962575212637477897785501552646522609988869914013540483809865681250419497686697771007",
|
||||
"259117086013202627776246767922441530941818887553125427303974923161874019266586362086201209516800483406550695241733194177441689509238807017410377709597512042313066624082916353517952311186154862265604547691127595848775610568757931191017711408826252153849035830401185072116424747461823031471398340229288074545677907941037288235820705892351068433882986888616658650280927692080339605869308790500409503709875902119018371991620994002568935113136548829739112656797303241986517250116412703509705427773477972349821676443446668383119322540099648994051790241624056519054483690809616061625743042361721863339415852426431208737266591962061753535748892894599629195183082621860853400937932839420261866586142503251450773096274235376822938649407127700846077124211823080804139298087057504713825264571448379371125032081826126566649084251699453951887789613650248405739378594599444335231188280123660406262468609212150349937584782292237144339628858485938215738821232393687046160677362909315071",
|
||||
"190797007524439073807468042969529173669356994749940177394741882673528979787005053706368049835514900244303495954950709725762186311224148828811920216904542206960744666169364221195289538436845390250168663932838805192055137154390912666527533007309292687539092257043362517857366624699975402375462954490293259233303137330643531556539739921926201438606439020075174723029056838272505051571967594608350063404495977660656269020823960825567012344189908927956646011998057988548630107637380993519826582389781888135705408653045219655801758081251164080554609057468028203308718724654081055323215860189611391296030471108443146745671967766308925858547271507311563765171008318248647110097614890313562856541784154881743146033909602737947385055355960331855614540900081456378659068370317267696980001187750995491090350108417050917991562167972281070161305972518044872048331306383715094854938415738549894606070722584737978176686422134354526989443028353644037187375385397838259511833166416134323695660367676897722287918773420968982326089026150031515424165462111337527431154890666327374921446276833564519776797633875503548665093914556482031482248883127023777039667707976559857333357013727342079099064400455741830654320379350833236245819348824064783585692924881021978332974949906122664421376034687815350484991",
|
||||
|
||||
/* DR moduli */
|
||||
"14059105607947488696282932836518693308967803494693489478439861164411992439598399594747002144074658928593502845729752797260025831423419686528151609940203368612079",
|
||||
"101745825697019260773923519755878567461315282017759829107608914364075275235254395622580447400994175578963163918967182013639660669771108475957692810857098847138903161308502419410142185759152435680068435915159402496058513611411688900243039",
|
||||
"736335108039604595805923406147184530889923370574768772191969612422073040099331944991573923112581267542507986451953227192970402893063850485730703075899286013451337291468249027691733891486704001513279827771740183629161065194874727962517148100775228363421083691764065477590823919364012917984605619526140821797602431",
|
||||
"38564998830736521417281865696453025806593491967131023221754800625044118265468851210705360385717536794615180260494208076605798671660719333199513807806252394423283413430106003596332513246682903994829528690198205120921557533726473585751382193953592127439965050261476810842071573684505878854588706623484573925925903505747545471088867712185004135201289273405614415899438276535626346098904241020877974002916168099951885406379295536200413493190419727789712076165162175783",
|
||||
"542189391331696172661670440619180536749994166415993334151601745392193484590296600979602378676624808129613777993466242203025054573692562689251250471628358318743978285860720148446448885701001277560572526947619392551574490839286458454994488665744991822837769918095117129546414124448777033941223565831420390846864429504774477949153794689948747680362212954278693335653935890352619041936727463717926744868338358149568368643403037768649616778526013610493696186055899318268339432671541328195724261329606699831016666359440874843103020666106568222401047720269951530296879490444224546654729111504346660859907296364097126834834235287147",
|
||||
"1487259134814709264092032648525971038895865645148901180585340454985524155135260217788758027400478312256339496385275012465661575576202252063145698732079880294664220579764848767704076761853197216563262660046602703973050798218246170835962005598561669706844469447435461092542265792444947706769615695252256130901271870341005768912974433684521436211263358097522726462083917939091760026658925757076733484173202927141441492573799914240222628795405623953109131594523623353044898339481494120112723445689647986475279242446083151413667587008191682564376412347964146113898565886683139407005941383669325997475076910488086663256335689181157957571445067490187939553165903773554290260531009121879044170766615232300936675369451260747671432073394867530820527479172464106442450727640226503746586340279816318821395210726268291535648506190714616083163403189943334431056876038286530365757187367147446004855912033137386225053275419626102417236133948503",
|
||||
"1095121115716677802856811290392395128588168592409109494900178008967955253005183831872715423151551999734857184538199864469605657805519106717529655044054833197687459782636297255219742994736751541815269727940751860670268774903340296040006114013971309257028332849679096824800250742691718610670812374272414086863715763724622797509437062518082383056050144624962776302147890521249477060215148275163688301275847155316042279405557632639366066847442861422164832655874655824221577849928863023018366835675399949740429332468186340518172487073360822220449055340582568461568645259954873303616953776393853174845132081121976327462740354930744487429617202585015510744298530101547706821590188733515880733527449780963163909830077616357506845523215289297624086914545378511082534229620116563260168494523906566709418166011112754529766183554579321224940951177394088465596712620076240067370589036924024728375076210477267488679008016579588696191194060127319035195370137160936882402244399699172017835144537488486396906144217720028992863941288217185353914991583400421682751000603596655790990815525126154394344641336397793791497068253936771017031980867706707490224041075826337383538651825493679503771934836094655802776331664261631740148281763487765852746577808019633679",
|
||||
/* DR moduli */
|
||||
"14059105607947488696282932836518693308967803494693489478439861164411992439598399594747002144074658928593502845729752797260025831423419686528151609940203368612079",
|
||||
"101745825697019260773923519755878567461315282017759829107608914364075275235254395622580447400994175578963163918967182013639660669771108475957692810857098847138903161308502419410142185759152435680068435915159402496058513611411688900243039",
|
||||
"736335108039604595805923406147184530889923370574768772191969612422073040099331944991573923112581267542507986451953227192970402893063850485730703075899286013451337291468249027691733891486704001513279827771740183629161065194874727962517148100775228363421083691764065477590823919364012917984605619526140821797602431",
|
||||
"38564998830736521417281865696453025806593491967131023221754800625044118265468851210705360385717536794615180260494208076605798671660719333199513807806252394423283413430106003596332513246682903994829528690198205120921557533726473585751382193953592127439965050261476810842071573684505878854588706623484573925925903505747545471088867712185004135201289273405614415899438276535626346098904241020877974002916168099951885406379295536200413493190419727789712076165162175783",
|
||||
"542189391331696172661670440619180536749994166415993334151601745392193484590296600979602378676624808129613777993466242203025054573692562689251250471628358318743978285860720148446448885701001277560572526947619392551574490839286458454994488665744991822837769918095117129546414124448777033941223565831420390846864429504774477949153794689948747680362212954278693335653935890352619041936727463717926744868338358149568368643403037768649616778526013610493696186055899318268339432671541328195724261329606699831016666359440874843103020666106568222401047720269951530296879490444224546654729111504346660859907296364097126834834235287147",
|
||||
"1487259134814709264092032648525971038895865645148901180585340454985524155135260217788758027400478312256339496385275012465661575576202252063145698732079880294664220579764848767704076761853197216563262660046602703973050798218246170835962005598561669706844469447435461092542265792444947706769615695252256130901271870341005768912974433684521436211263358097522726462083917939091760026658925757076733484173202927141441492573799914240222628795405623953109131594523623353044898339481494120112723445689647986475279242446083151413667587008191682564376412347964146113898565886683139407005941383669325997475076910488086663256335689181157957571445067490187939553165903773554290260531009121879044170766615232300936675369451260747671432073394867530820527479172464106442450727640226503746586340279816318821395210726268291535648506190714616083163403189943334431056876038286530365757187367147446004855912033137386225053275419626102417236133948503",
|
||||
"1095121115716677802856811290392395128588168592409109494900178008967955253005183831872715423151551999734857184538199864469605657805519106717529655044054833197687459782636297255219742994736751541815269727940751860670268774903340296040006114013971309257028332849679096824800250742691718610670812374272414086863715763724622797509437062518082383056050144624962776302147890521249477060215148275163688301275847155316042279405557632639366066847442861422164832655874655824221577849928863023018366835675399949740429332468186340518172487073360822220449055340582568461568645259954873303616953776393853174845132081121976327462740354930744487429617202585015510744298530101547706821590188733515880733527449780963163909830077616357506845523215289297624086914545378511082534229620116563260168494523906566709418166011112754529766183554579321224940951177394088465596712620076240067370589036924024728375076210477267488679008016579588696191194060127319035195370137160936882402244399699172017835144537488486396906144217720028992863941288217185353914991583400421682751000603596655790990815525126154394344641336397793791497068253936771017031980867706707490224041075826337383538651825493679503771934836094655802776331664261631740148281763487765852746577808019633679",
|
||||
|
||||
/* generic unrestricted moduli */
|
||||
"17933601194860113372237070562165128350027320072176844226673287945873370751245439587792371960615073855669274087805055507977323024886880985062002853331424203",
|
||||
"2893527720709661239493896562339544088620375736490408468011883030469939904368086092336458298221245707898933583190713188177399401852627749210994595974791782790253946539043962213027074922559572312141181787434278708783207966459019479487",
|
||||
"347743159439876626079252796797422223177535447388206607607181663903045907591201940478223621722118173270898487582987137708656414344685816179420855160986340457973820182883508387588163122354089264395604796675278966117567294812714812796820596564876450716066283126720010859041484786529056457896367683122960411136319",
|
||||
"47266428956356393164697365098120418976400602706072312735924071745438532218237979333351774907308168340693326687317443721193266215155735814510792148768576498491199122744351399489453533553203833318691678263241941706256996197460424029012419012634671862283532342656309677173602509498417976091509154360039893165037637034737020327399910409885798185771003505320583967737293415979917317338985837385734747478364242020380416892056650841470869294527543597349250299539682430605173321029026555546832473048600327036845781970289288898317888427517364945316709081173840186150794397479045034008257793436817683392375274635794835245695887",
|
||||
"436463808505957768574894870394349739623346440601945961161254440072143298152040105676491048248110146278752857839930515766167441407021501229924721335644557342265864606569000117714935185566842453630868849121480179691838399545644365571106757731317371758557990781880691336695584799313313687287468894148823761785582982549586183756806449017542622267874275103877481475534991201849912222670102069951687572917937634467778042874315463238062009202992087620963771759666448266532858079402669920025224220613419441069718482837399612644978839925207109870840278194042158748845445131729137117098529028886770063736487420613144045836803985635654192482395882603511950547826439092832800532152534003936926017612446606135655146445620623395788978726744728503058670046885876251527122350275750995227",
|
||||
"11424167473351836398078306042624362277956429440521137061889702611766348760692206243140413411077394583180726863277012016602279290144126785129569474909173584789822341986742719230331946072730319555984484911716797058875905400999504305877245849119687509023232790273637466821052576859232452982061831009770786031785669030271542286603956118755585683996118896215213488875253101894663403069677745948305893849505434201763745232895780711972432011344857521691017896316861403206449421332243658855453435784006517202894181640562433575390821384210960117518650374602256601091379644034244332285065935413233557998331562749140202965844219336298970011513882564935538704289446968322281451907487362046511461221329799897350993370560697505809686438782036235372137015731304779072430260986460269894522159103008260495503005267165927542949439526272736586626709581721032189532726389643625590680105784844246152702670169304203783072275089194754889511973916207",
|
||||
"1214855636816562637502584060163403830270705000634713483015101384881871978446801224798536155406895823305035467591632531067547890948695117172076954220727075688048751022421198712032848890056357845974246560748347918630050853933697792254955890439720297560693579400297062396904306270145886830719309296352765295712183040773146419022875165382778007040109957609739589875590885701126197906063620133954893216612678838507540777138437797705602453719559017633986486649523611975865005712371194067612263330335590526176087004421363598470302731349138773205901447704682181517904064735636518462452242791676541725292378925568296858010151852326316777511935037531017413910506921922450666933202278489024521263798482237150056835746454842662048692127173834433089016107854491097456725016327709663199738238442164843147132789153725513257167915555162094970853584447993125488607696008169807374736711297007473812256272245489405898470297178738029484459690836250560495461579533254473316340608217876781986188705928270735695752830825527963838355419762516246028680280988020401914551825487349990306976304093109384451438813251211051597392127491464898797406789175453067960072008590614886532333015881171367104445044718144312416815712216611576221546455968770801413440778423979",
|
||||
NULL
|
||||
/* generic unrestricted moduli */
|
||||
"17933601194860113372237070562165128350027320072176844226673287945873370751245439587792371960615073855669274087805055507977323024886880985062002853331424203",
|
||||
"2893527720709661239493896562339544088620375736490408468011883030469939904368086092336458298221245707898933583190713188177399401852627749210994595974791782790253946539043962213027074922559572312141181787434278708783207966459019479487",
|
||||
"347743159439876626079252796797422223177535447388206607607181663903045907591201940478223621722118173270898487582987137708656414344685816179420855160986340457973820182883508387588163122354089264395604796675278966117567294812714812796820596564876450716066283126720010859041484786529056457896367683122960411136319",
|
||||
"47266428956356393164697365098120418976400602706072312735924071745438532218237979333351774907308168340693326687317443721193266215155735814510792148768576498491199122744351399489453533553203833318691678263241941706256996197460424029012419012634671862283532342656309677173602509498417976091509154360039893165037637034737020327399910409885798185771003505320583967737293415979917317338985837385734747478364242020380416892056650841470869294527543597349250299539682430605173321029026555546832473048600327036845781970289288898317888427517364945316709081173840186150794397479045034008257793436817683392375274635794835245695887",
|
||||
"436463808505957768574894870394349739623346440601945961161254440072143298152040105676491048248110146278752857839930515766167441407021501229924721335644557342265864606569000117714935185566842453630868849121480179691838399545644365571106757731317371758557990781880691336695584799313313687287468894148823761785582982549586183756806449017542622267874275103877481475534991201849912222670102069951687572917937634467778042874315463238062009202992087620963771759666448266532858079402669920025224220613419441069718482837399612644978839925207109870840278194042158748845445131729137117098529028886770063736487420613144045836803985635654192482395882603511950547826439092832800532152534003936926017612446606135655146445620623395788978726744728503058670046885876251527122350275750995227",
|
||||
"11424167473351836398078306042624362277956429440521137061889702611766348760692206243140413411077394583180726863277012016602279290144126785129569474909173584789822341986742719230331946072730319555984484911716797058875905400999504305877245849119687509023232790273637466821052576859232452982061831009770786031785669030271542286603956118755585683996118896215213488875253101894663403069677745948305893849505434201763745232895780711972432011344857521691017896316861403206449421332243658855453435784006517202894181640562433575390821384210960117518650374602256601091379644034244332285065935413233557998331562749140202965844219336298970011513882564935538704289446968322281451907487362046511461221329799897350993370560697505809686438782036235372137015731304779072430260986460269894522159103008260495503005267165927542949439526272736586626709581721032189532726389643625590680105784844246152702670169304203783072275089194754889511973916207",
|
||||
"1214855636816562637502584060163403830270705000634713483015101384881871978446801224798536155406895823305035467591632531067547890948695117172076954220727075688048751022421198712032848890056357845974246560748347918630050853933697792254955890439720297560693579400297062396904306270145886830719309296352765295712183040773146419022875165382778007040109957609739589875590885701126197906063620133954893216612678838507540777138437797705602453719559017633986486649523611975865005712371194067612263330335590526176087004421363598470302731349138773205901447704682181517904064735636518462452242791676541725292378925568296858010151852326316777511935037531017413910506921922450666933202278489024521263798482237150056835746454842662048692127173834433089016107854491097456725016327709663199738238442164843147132789153725513257167915555162094970853584447993125488607696008169807374736711297007473812256272245489405898470297178738029484459690836250560495461579533254473316340608217876781986188705928270735695752830825527963838355419762516246028680280988020401914551825487349990306976304093109384451438813251211051597392127491464898797406789175453067960072008590614886532333015881171367104445044718144312416815712216611576221546455968770801413440778423979",
|
||||
NULL
|
||||
};
|
||||
log = fopen("logs/expt.log", "w");
|
||||
logb = fopen("logs/expt_dr.log", "w");
|
||||
logc = fopen("logs/expt_2k.log", "w");
|
||||
for (n = 0; primes[n]; n++) {
|
||||
SLEEP;
|
||||
mp_read_radix(&a, primes[n], 10);
|
||||
mp_zero(&b);
|
||||
for (rr = 0; rr < (unsigned)mp_count_bits(&a); rr++) {
|
||||
mp_mul_2(&b, &b);
|
||||
b.dp[0] |= lbit();
|
||||
b.used += 1;
|
||||
log = fopen("logs/expt.log", "w");
|
||||
logb = fopen("logs/expt_dr.log", "w");
|
||||
logc = fopen("logs/expt_2k.log", "w");
|
||||
logd = fopen("logs/expt_2kl.log", "w");
|
||||
for (n = 0; primes[n]; n++) {
|
||||
SLEEP;
|
||||
mp_read_radix(&a, primes[n], 10);
|
||||
mp_zero(&b);
|
||||
for (rr = 0; rr < (unsigned) mp_count_bits(&a); rr++) {
|
||||
mp_mul_2(&b, &b);
|
||||
b.dp[0] |= lbit();
|
||||
b.used += 1;
|
||||
}
|
||||
mp_sub_d(&a, 1, &c);
|
||||
mp_mod(&b, &c, &b);
|
||||
mp_set(&c, 3);
|
||||
rr = 0;
|
||||
tt = -1;
|
||||
do {
|
||||
gg = TIMFUNC();
|
||||
DO(mp_exptmod(&c, &b, &a, &d));
|
||||
gg = (TIMFUNC() - gg) >> 1;
|
||||
if (tt > gg)
|
||||
tt = gg;
|
||||
} while (++rr < 10);
|
||||
mp_sub_d(&a, 1, &e);
|
||||
mp_sub(&e, &b, &b);
|
||||
mp_exptmod(&c, &b, &a, &e); /* c^(p-1-b) mod a */
|
||||
mp_mulmod(&e, &d, &a, &d); /* c^b * c^(p-1-b) == c^p-1 == 1 */
|
||||
if (mp_cmp_d(&d, 1)) {
|
||||
printf("Different (%d)!!!\n", mp_count_bits(&a));
|
||||
draw(&d);
|
||||
exit(0);
|
||||
}
|
||||
printf("Exponentiating\t%4d-bit => %9llu/sec, %9llu cycles\n",
|
||||
mp_count_bits(&a), CLK_PER_SEC / tt, tt);
|
||||
fprintf(n < 4 ? logd : (n < 9) ? logc : (n < 16) ? logb : log,
|
||||
"%d %9llu\n", mp_count_bits(&a), tt);
|
||||
}
|
||||
mp_sub_d(&a, 1, &c);
|
||||
mp_mod(&b, &c, &b);
|
||||
mp_set(&c, 3);
|
||||
rr = 0;
|
||||
tt = -1;
|
||||
do {
|
||||
gg = TIMFUNC();
|
||||
DO(mp_exptmod(&c, &b, &a, &d));
|
||||
gg = (TIMFUNC() - gg)>>1;
|
||||
if (tt > gg) tt = gg;
|
||||
} while (++rr < 10);
|
||||
mp_sub_d(&a, 1, &e);
|
||||
mp_sub(&e, &b, &b);
|
||||
mp_exptmod(&c, &b, &a, &e); /* c^(p-1-b) mod a */
|
||||
mp_mulmod(&e, &d, &a, &d); /* c^b * c^(p-1-b) == c^p-1 == 1 */
|
||||
if (mp_cmp_d(&d, 1)) {
|
||||
printf("Different (%d)!!!\n", mp_count_bits(&a));
|
||||
draw(&d);
|
||||
exit(0);
|
||||
}
|
||||
printf("Exponentiating\t%4d-bit => %9llu/sec, %9llu cycles\n", mp_count_bits(&a), CLK_PER_SEC/tt, tt);
|
||||
fprintf((n < 6) ? logc : (n < 13) ? logb : log, "%d %9llu\n", mp_count_bits(&a), tt);
|
||||
}
|
||||
}
|
||||
fclose(log);
|
||||
fclose(logb);
|
||||
fclose(logc);
|
||||
fclose(logd);
|
||||
|
||||
log = fopen("logs/invmod.log", "w");
|
||||
for (cnt = 4; cnt <= 128; cnt += 4) {
|
||||
@@ -264,28 +287,29 @@ int main(void)
|
||||
mp_rand(&b, cnt);
|
||||
|
||||
do {
|
||||
mp_add_d(&b, 1, &b);
|
||||
mp_gcd(&a, &b, &c);
|
||||
mp_add_d(&b, 1, &b);
|
||||
mp_gcd(&a, &b, &c);
|
||||
} while (mp_cmp_d(&c, 1) != MP_EQ);
|
||||
|
||||
rr = 0;
|
||||
tt = -1;
|
||||
rr = 0;
|
||||
tt = -1;
|
||||
do {
|
||||
gg = TIMFUNC();
|
||||
DO(mp_invmod(&b, &a, &c));
|
||||
gg = (TIMFUNC() - gg)>>1;
|
||||
if (tt > gg) tt = gg;
|
||||
gg = TIMFUNC();
|
||||
DO(mp_invmod(&b, &a, &c));
|
||||
gg = (TIMFUNC() - gg) >> 1;
|
||||
if (tt > gg)
|
||||
tt = gg;
|
||||
} while (++rr < 1000);
|
||||
mp_mulmod(&b, &c, &a, &d);
|
||||
if (mp_cmp_d(&d, 1) != MP_EQ) {
|
||||
printf("Failed to invert\n");
|
||||
return 0;
|
||||
printf("Failed to invert\n");
|
||||
return 0;
|
||||
}
|
||||
printf("Inverting mod\t%4d-bit => %9llu/sec, %9llu cycles\n", mp_count_bits(&a), CLK_PER_SEC/tt, tt);
|
||||
fprintf(log, "%d %9llu\n", cnt*DIGIT_BIT, tt);
|
||||
printf("Inverting mod\t%4d-bit => %9llu/sec, %9llu cycles\n",
|
||||
mp_count_bits(&a), CLK_PER_SEC / tt, tt);
|
||||
fprintf(log, "%d %9llu\n", cnt * DIGIT_BIT, tt);
|
||||
}
|
||||
fclose(log);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
2
dep.pl
2
dep.pl
@@ -13,6 +13,8 @@ print CLASS "#if !(defined(LTM1) && defined(LTM2) && defined(LTM3))\n#if defined
|
||||
foreach my $filename (glob "bn*.c") {
|
||||
my $define = $filename;
|
||||
|
||||
print "Processing $filename\n";
|
||||
|
||||
# convert filename to upper case so we can use it as a define
|
||||
$define =~ tr/[a-z]/[A-Z]/;
|
||||
$define =~ tr/\./_/;
|
||||
|
||||
@@ -18,15 +18,15 @@ is_mersenne (long s, int *pp)
|
||||
}
|
||||
|
||||
if ((res = mp_init (&u)) != MP_OKAY) {
|
||||
goto __N;
|
||||
goto LBL_N;
|
||||
}
|
||||
|
||||
/* n = 2^s - 1 */
|
||||
if ((res = mp_2expt(&n, s)) != MP_OKAY) {
|
||||
goto __MU;
|
||||
goto LBL_MU;
|
||||
}
|
||||
if ((res = mp_sub_d (&n, 1, &n)) != MP_OKAY) {
|
||||
goto __MU;
|
||||
goto LBL_MU;
|
||||
}
|
||||
|
||||
/* set u=4 */
|
||||
@@ -36,22 +36,22 @@ is_mersenne (long s, int *pp)
|
||||
for (k = 1; k <= s - 2; k++) {
|
||||
/* u = u^2 - 2 mod n */
|
||||
if ((res = mp_sqr (&u, &u)) != MP_OKAY) {
|
||||
goto __MU;
|
||||
goto LBL_MU;
|
||||
}
|
||||
if ((res = mp_sub_d (&u, 2, &u)) != MP_OKAY) {
|
||||
goto __MU;
|
||||
goto LBL_MU;
|
||||
}
|
||||
|
||||
/* make sure u is positive */
|
||||
while (u.sign == MP_NEG) {
|
||||
if ((res = mp_add (&u, &n, &u)) != MP_OKAY) {
|
||||
goto __MU;
|
||||
goto LBL_MU;
|
||||
}
|
||||
}
|
||||
|
||||
/* reduce */
|
||||
if ((res = mp_reduce_2k (&u, &n, 1)) != MP_OKAY) {
|
||||
goto __MU;
|
||||
goto LBL_MU;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -62,8 +62,8 @@ is_mersenne (long s, int *pp)
|
||||
}
|
||||
|
||||
res = MP_OKAY;
|
||||
__MU:mp_clear (&u);
|
||||
__N:mp_clear (&n);
|
||||
LBL_MU:mp_clear (&u);
|
||||
LBL_N:mp_clear (&n);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
54
etc/pprime.c
54
etc/pprime.c
@@ -189,7 +189,7 @@ pprime (int k, int li, mp_int * p, mp_int * q)
|
||||
}
|
||||
|
||||
if ((res = mp_init (&v)) != MP_OKAY) {
|
||||
goto __C;
|
||||
goto LBL_C;
|
||||
}
|
||||
|
||||
/* product of first 50 primes */
|
||||
@@ -197,34 +197,34 @@ pprime (int k, int li, mp_int * p, mp_int * q)
|
||||
mp_read_radix (&v,
|
||||
"19078266889580195013601891820992757757219839668357012055907516904309700014933909014729740190",
|
||||
10)) != MP_OKAY) {
|
||||
goto __V;
|
||||
goto LBL_V;
|
||||
}
|
||||
|
||||
if ((res = mp_init (&a)) != MP_OKAY) {
|
||||
goto __V;
|
||||
goto LBL_V;
|
||||
}
|
||||
|
||||
/* set the prime */
|
||||
mp_set (&a, prime_digit ());
|
||||
|
||||
if ((res = mp_init (&b)) != MP_OKAY) {
|
||||
goto __A;
|
||||
goto LBL_A;
|
||||
}
|
||||
|
||||
if ((res = mp_init (&n)) != MP_OKAY) {
|
||||
goto __B;
|
||||
goto LBL_B;
|
||||
}
|
||||
|
||||
if ((res = mp_init (&x)) != MP_OKAY) {
|
||||
goto __N;
|
||||
goto LBL_N;
|
||||
}
|
||||
|
||||
if ((res = mp_init (&y)) != MP_OKAY) {
|
||||
goto __X;
|
||||
goto LBL_X;
|
||||
}
|
||||
|
||||
if ((res = mp_init (&z)) != MP_OKAY) {
|
||||
goto __Y;
|
||||
goto LBL_Y;
|
||||
}
|
||||
|
||||
/* now loop making the single digit */
|
||||
@@ -236,25 +236,25 @@ pprime (int k, int li, mp_int * p, mp_int * q)
|
||||
|
||||
/* now compute z = a * b * 2 */
|
||||
if ((res = mp_mul (&a, &b, &z)) != MP_OKAY) { /* z = a * b */
|
||||
goto __Z;
|
||||
goto LBL_Z;
|
||||
}
|
||||
|
||||
if ((res = mp_copy (&z, &c)) != MP_OKAY) { /* c = a * b */
|
||||
goto __Z;
|
||||
goto LBL_Z;
|
||||
}
|
||||
|
||||
if ((res = mp_mul_2 (&z, &z)) != MP_OKAY) { /* z = 2 * a * b */
|
||||
goto __Z;
|
||||
goto LBL_Z;
|
||||
}
|
||||
|
||||
/* n = z + 1 */
|
||||
if ((res = mp_add_d (&z, 1, &n)) != MP_OKAY) { /* n = z + 1 */
|
||||
goto __Z;
|
||||
goto LBL_Z;
|
||||
}
|
||||
|
||||
/* check (n, v) == 1 */
|
||||
if ((res = mp_gcd (&n, &v, &y)) != MP_OKAY) { /* y = (n, v) */
|
||||
goto __Z;
|
||||
goto LBL_Z;
|
||||
}
|
||||
|
||||
if (mp_cmp_d (&y, 1) != MP_EQ)
|
||||
@@ -266,7 +266,7 @@ pprime (int k, int li, mp_int * p, mp_int * q)
|
||||
|
||||
/* compute x^a mod n */
|
||||
if ((res = mp_exptmod (&x, &a, &n, &y)) != MP_OKAY) { /* y = x^a mod n */
|
||||
goto __Z;
|
||||
goto LBL_Z;
|
||||
}
|
||||
|
||||
/* if y == 1 loop */
|
||||
@@ -275,7 +275,7 @@ pprime (int k, int li, mp_int * p, mp_int * q)
|
||||
|
||||
/* now x^2a mod n */
|
||||
if ((res = mp_sqrmod (&y, &n, &y)) != MP_OKAY) { /* y = x^2a mod n */
|
||||
goto __Z;
|
||||
goto LBL_Z;
|
||||
}
|
||||
|
||||
if (mp_cmp_d (&y, 1) == MP_EQ)
|
||||
@@ -283,7 +283,7 @@ pprime (int k, int li, mp_int * p, mp_int * q)
|
||||
|
||||
/* compute x^b mod n */
|
||||
if ((res = mp_exptmod (&x, &b, &n, &y)) != MP_OKAY) { /* y = x^b mod n */
|
||||
goto __Z;
|
||||
goto LBL_Z;
|
||||
}
|
||||
|
||||
/* if y == 1 loop */
|
||||
@@ -292,7 +292,7 @@ pprime (int k, int li, mp_int * p, mp_int * q)
|
||||
|
||||
/* now x^2b mod n */
|
||||
if ((res = mp_sqrmod (&y, &n, &y)) != MP_OKAY) { /* y = x^2b mod n */
|
||||
goto __Z;
|
||||
goto LBL_Z;
|
||||
}
|
||||
|
||||
if (mp_cmp_d (&y, 1) == MP_EQ)
|
||||
@@ -300,7 +300,7 @@ pprime (int k, int li, mp_int * p, mp_int * q)
|
||||
|
||||
/* compute x^c mod n == x^ab mod n */
|
||||
if ((res = mp_exptmod (&x, &c, &n, &y)) != MP_OKAY) { /* y = x^ab mod n */
|
||||
goto __Z;
|
||||
goto LBL_Z;
|
||||
}
|
||||
|
||||
/* if y == 1 loop */
|
||||
@@ -309,7 +309,7 @@ pprime (int k, int li, mp_int * p, mp_int * q)
|
||||
|
||||
/* now compute (x^c mod n)^2 */
|
||||
if ((res = mp_sqrmod (&y, &n, &y)) != MP_OKAY) { /* y = x^2ab mod n */
|
||||
goto __Z;
|
||||
goto LBL_Z;
|
||||
}
|
||||
|
||||
/* y should be 1 */
|
||||
@@ -346,14 +346,14 @@ pprime (int k, int li, mp_int * p, mp_int * q)
|
||||
mp_exch (&n, p);
|
||||
|
||||
res = MP_OKAY;
|
||||
__Z:mp_clear (&z);
|
||||
__Y:mp_clear (&y);
|
||||
__X:mp_clear (&x);
|
||||
__N:mp_clear (&n);
|
||||
__B:mp_clear (&b);
|
||||
__A:mp_clear (&a);
|
||||
__V:mp_clear (&v);
|
||||
__C:mp_clear (&c);
|
||||
LBL_Z:mp_clear (&z);
|
||||
LBL_Y:mp_clear (&y);
|
||||
LBL_X:mp_clear (&x);
|
||||
LBL_N:mp_clear (&n);
|
||||
LBL_B:mp_clear (&b);
|
||||
LBL_A:mp_clear (&a);
|
||||
LBL_V:mp_clear (&v);
|
||||
LBL_C:mp_clear (&c);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
37
etc/tune.c
37
etc/tune.c
@@ -10,13 +10,44 @@
|
||||
*/
|
||||
#define TIMES (1UL<<14UL)
|
||||
|
||||
/* RDTSC from Scott Duplichan */
|
||||
static ulong64 TIMFUNC (void)
|
||||
{
|
||||
#if defined __GNUC__
|
||||
#if defined(__i386__) || defined(__x86_64__)
|
||||
unsigned long long a;
|
||||
__asm__ __volatile__ ("rdtsc\nmovl %%eax,%0\nmovl %%edx,4+%0\n"::"m"(a):"%eax","%edx");
|
||||
return a;
|
||||
#else /* gcc-IA64 version */
|
||||
unsigned long result;
|
||||
__asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory");
|
||||
while (__builtin_expect ((int) result == -1, 0))
|
||||
__asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory");
|
||||
return result;
|
||||
#endif
|
||||
|
||||
// Microsoft and Intel Windows compilers
|
||||
#elif defined _M_IX86
|
||||
__asm rdtsc
|
||||
#elif defined _M_AMD64
|
||||
return __rdtsc ();
|
||||
#elif defined _M_IA64
|
||||
#if defined __INTEL_COMPILER
|
||||
#include <ia64intrin.h>
|
||||
#endif
|
||||
return __getReg (3116);
|
||||
#else
|
||||
#error need rdtsc function for this build
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
#ifndef X86_TIMER
|
||||
|
||||
/* generic ISO C timer */
|
||||
ulong64 __T;
|
||||
void t_start(void) { __T = clock(); }
|
||||
ulong64 t_read(void) { return clock() - __T; }
|
||||
ulong64 LBL_T;
|
||||
void t_start(void) { LBL_T = TIMFUNC(); }
|
||||
ulong64 t_read(void) { return TIMFUNC() - LBL_T; }
|
||||
|
||||
#else
|
||||
extern void t_start(void);
|
||||
|
||||
32
logs/add.log
32
logs/add.log
@@ -1,16 +1,16 @@
|
||||
224 222
|
||||
448 330
|
||||
672 436
|
||||
896 520
|
||||
1120 612
|
||||
1344 696
|
||||
1568 810
|
||||
1792 912
|
||||
2016 1006
|
||||
2240 1116
|
||||
2464 1152
|
||||
2688 1284
|
||||
2912 1348
|
||||
3136 1486
|
||||
3360 1580
|
||||
3584 1636
|
||||
480 87
|
||||
960 111
|
||||
1440 135
|
||||
1920 159
|
||||
2400 200
|
||||
2880 224
|
||||
3360 248
|
||||
3840 272
|
||||
4320 296
|
||||
4800 320
|
||||
5280 344
|
||||
5760 368
|
||||
6240 392
|
||||
6720 416
|
||||
7200 440
|
||||
7680 464
|
||||
|
||||
@@ -0,0 +1,7 @@
|
||||
513 1489160
|
||||
769 3688476
|
||||
1025 8162061
|
||||
2049 49260015
|
||||
2561 89579052
|
||||
3073 148797060
|
||||
4097 324449263
|
||||
|
||||
@@ -0,0 +1,5 @@
|
||||
607 2272809
|
||||
1279 9557382
|
||||
2203 36250309
|
||||
3217 87666486
|
||||
4253 174168369
|
||||
|
||||
4
logs/expt_2kl.log
Normal file
4
logs/expt_2kl.log
Normal file
@@ -0,0 +1,4 @@
|
||||
1024 6954080
|
||||
2048 35993987
|
||||
4096 176068521
|
||||
521 1683720
|
||||
@@ -0,0 +1,7 @@
|
||||
532 1989592
|
||||
784 3898697
|
||||
1036 6519700
|
||||
1540 15676650
|
||||
2072 33128187
|
||||
3080 82963362
|
||||
4116 168358337
|
||||
|
||||
227
logs/mult.log
227
logs/mult.log
@@ -1,143 +1,84 @@
|
||||
140 1272
|
||||
195 1428
|
||||
252 1996
|
||||
307 2586
|
||||
364 3464
|
||||
420 4420
|
||||
476 5260
|
||||
532 6430
|
||||
588 7692
|
||||
644 8704
|
||||
699 10226
|
||||
755 11670
|
||||
812 13190
|
||||
865 14834
|
||||
924 16738
|
||||
979 18362
|
||||
1036 20660
|
||||
1092 22776
|
||||
1148 24848
|
||||
1204 27168
|
||||
1260 29930
|
||||
1316 32258
|
||||
1370 35172
|
||||
1422 37534
|
||||
1482 40390
|
||||
1537 43990
|
||||
1589 46946
|
||||
1652 50438
|
||||
1703 52902
|
||||
1764 56646
|
||||
1820 59892
|
||||
1876 63248
|
||||
1932 66872
|
||||
1988 72596
|
||||
2042 74662
|
||||
2100 78512
|
||||
2156 82944
|
||||
2211 87444
|
||||
2268 92170
|
||||
2324 95534
|
||||
2380 100484
|
||||
2435 105024
|
||||
2491 109460
|
||||
2546 114154
|
||||
2603 118946
|
||||
2660 124110
|
||||
2716 129300
|
||||
2771 134274
|
||||
2828 139594
|
||||
2883 145234
|
||||
2939 150332
|
||||
2996 155750
|
||||
3048 161718
|
||||
3108 167492
|
||||
3162 173882
|
||||
3219 179766
|
||||
3276 185560
|
||||
3330 191826
|
||||
3388 197822
|
||||
3442 204176
|
||||
3500 210682
|
||||
3556 217236
|
||||
3612 223484
|
||||
3666 230714
|
||||
3724 237744
|
||||
3779 244080
|
||||
3835 250970
|
||||
3890 257914
|
||||
3947 265162
|
||||
4001 272128
|
||||
4060 279108
|
||||
4116 287606
|
||||
4171 294716
|
||||
4227 302806
|
||||
4284 310260
|
||||
4340 318564
|
||||
4395 326164
|
||||
4443 334034
|
||||
4508 342108
|
||||
4561 351810
|
||||
4618 358828
|
||||
4675 367332
|
||||
4732 376140
|
||||
4787 384172
|
||||
4841 393308
|
||||
4899 402036
|
||||
4955 411286
|
||||
5010 420290
|
||||
5067 429688
|
||||
5124 438810
|
||||
5180 448130
|
||||
5235 457264
|
||||
5290 467390
|
||||
5348 476586
|
||||
5404 486120
|
||||
5459 496512
|
||||
5516 506624
|
||||
5569 516346
|
||||
5628 526604
|
||||
5684 536544
|
||||
5740 546936
|
||||
5796 557284
|
||||
5852 568106
|
||||
5907 578824
|
||||
5963 589204
|
||||
6019 600176
|
||||
6076 610564
|
||||
6127 621972
|
||||
6188 633564
|
||||
6244 644730
|
||||
6300 655288
|
||||
6354 667402
|
||||
6412 678824
|
||||
6467 690594
|
||||
6522 702718
|
||||
6580 714148
|
||||
6636 725608
|
||||
6690 737834
|
||||
6747 750100
|
||||
6804 762202
|
||||
6860 774184
|
||||
6916 787298
|
||||
6971 798734
|
||||
7028 811162
|
||||
7083 824570
|
||||
7139 837738
|
||||
7196 2579488
|
||||
7245 2626714
|
||||
7308 2643582
|
||||
7364 2698746
|
||||
7416 2734106
|
||||
7476 2773372
|
||||
7530 2816738
|
||||
7588 2859204
|
||||
7643 2938596
|
||||
7698 2919716
|
||||
7754 2988542
|
||||
7812 3026520
|
||||
7867 3058304
|
||||
7924 3115790
|
||||
7977 3161450
|
||||
8035 3203138
|
||||
8092 3244056
|
||||
271 555
|
||||
390 855
|
||||
508 1161
|
||||
631 1605
|
||||
749 2117
|
||||
871 2687
|
||||
991 3329
|
||||
1108 4084
|
||||
1231 4786
|
||||
1351 5624
|
||||
1470 6392
|
||||
1586 7364
|
||||
1710 8218
|
||||
1830 9255
|
||||
1951 10217
|
||||
2067 11461
|
||||
2191 12463
|
||||
2308 13677
|
||||
2430 14800
|
||||
2551 16232
|
||||
2671 17460
|
||||
2791 18899
|
||||
2902 20247
|
||||
3028 21902
|
||||
3151 23240
|
||||
3267 24927
|
||||
3391 26441
|
||||
3511 28277
|
||||
3631 29838
|
||||
3749 31751
|
||||
3869 33673
|
||||
3989 35431
|
||||
4111 37518
|
||||
4231 39426
|
||||
4349 41504
|
||||
4471 43567
|
||||
4591 45786
|
||||
4711 47876
|
||||
4831 50299
|
||||
4951 52427
|
||||
5071 54785
|
||||
5189 57241
|
||||
5307 59730
|
||||
5431 62194
|
||||
5551 64761
|
||||
5670 67322
|
||||
5789 70073
|
||||
5907 72663
|
||||
6030 75437
|
||||
6151 78242
|
||||
6268 81202
|
||||
6389 83948
|
||||
6509 86985
|
||||
6631 89903
|
||||
6747 93184
|
||||
6869 96044
|
||||
6991 99286
|
||||
7109 102395
|
||||
7229 105917
|
||||
7351 108940
|
||||
7470 112490
|
||||
7589 115702
|
||||
7711 119508
|
||||
7831 122632
|
||||
7951 126410
|
||||
8071 129808
|
||||
8190 133895
|
||||
8311 137146
|
||||
8431 141218
|
||||
8549 144732
|
||||
8667 149131
|
||||
8790 152462
|
||||
8911 156754
|
||||
9030 160479
|
||||
9149 165138
|
||||
9271 168601
|
||||
9391 173185
|
||||
9511 176988
|
||||
9627 181976
|
||||
9751 185539
|
||||
9870 190388
|
||||
9991 194335
|
||||
10110 199605
|
||||
10228 203298
|
||||
|
||||
@@ -1,33 +1,84 @@
|
||||
924 16686
|
||||
1146 25334
|
||||
1371 35304
|
||||
1591 47122
|
||||
1820 61500
|
||||
2044 75254
|
||||
2266 91732
|
||||
2492 111656
|
||||
2716 129428
|
||||
2937 147508
|
||||
3164 167758
|
||||
3388 188248
|
||||
3612 210826
|
||||
3836 233814
|
||||
4059 256898
|
||||
4284 280210
|
||||
4508 310372
|
||||
4731 333902
|
||||
4955 376502
|
||||
5179 402854
|
||||
5404 432004
|
||||
5626 459010
|
||||
5849 491868
|
||||
6076 520550
|
||||
6300 547400
|
||||
6524 575968
|
||||
6747 608482
|
||||
6971 642850
|
||||
7196 673670
|
||||
7419 710680
|
||||
7644 743942
|
||||
7868 780394
|
||||
8092 817342
|
||||
271 560
|
||||
391 870
|
||||
511 1159
|
||||
631 1605
|
||||
750 2111
|
||||
871 2737
|
||||
991 3361
|
||||
1111 4054
|
||||
1231 4778
|
||||
1351 5600
|
||||
1471 6404
|
||||
1591 7323
|
||||
1710 8255
|
||||
1831 9239
|
||||
1948 10257
|
||||
2070 11397
|
||||
2190 12531
|
||||
2308 13665
|
||||
2429 14870
|
||||
2550 16175
|
||||
2671 17539
|
||||
2787 18879
|
||||
2911 20350
|
||||
3031 21807
|
||||
3150 23415
|
||||
3270 24897
|
||||
3388 26567
|
||||
3511 28205
|
||||
3627 30076
|
||||
3751 31744
|
||||
3869 33657
|
||||
3991 35425
|
||||
4111 37522
|
||||
4229 39363
|
||||
4351 41503
|
||||
4470 43491
|
||||
4590 45827
|
||||
4711 47795
|
||||
4828 50166
|
||||
4951 52318
|
||||
5070 54911
|
||||
5191 57036
|
||||
5308 58237
|
||||
5431 60248
|
||||
5551 62678
|
||||
5671 64786
|
||||
5791 67294
|
||||
5908 69343
|
||||
6031 71607
|
||||
6151 74166
|
||||
6271 76590
|
||||
6391 78734
|
||||
6511 81175
|
||||
6631 83742
|
||||
6750 86403
|
||||
6868 88873
|
||||
6990 91150
|
||||
7110 94211
|
||||
7228 96922
|
||||
7351 99445
|
||||
7469 102216
|
||||
7589 104968
|
||||
7711 108113
|
||||
7827 110758
|
||||
7950 113714
|
||||
8071 116511
|
||||
8186 119643
|
||||
8310 122679
|
||||
8425 125581
|
||||
8551 128715
|
||||
8669 131778
|
||||
8788 135116
|
||||
8910 138138
|
||||
9031 141628
|
||||
9148 144754
|
||||
9268 148367
|
||||
9391 151551
|
||||
9511 155033
|
||||
9631 158652
|
||||
9751 162125
|
||||
9871 165248
|
||||
9988 168627
|
||||
10111 172427
|
||||
10231 176412
|
||||
|
||||
227
logs/sqr.log
227
logs/sqr.log
@@ -1,143 +1,84 @@
|
||||
139 806
|
||||
195 1212
|
||||
252 1604
|
||||
307 2260
|
||||
364 2892
|
||||
420 3308
|
||||
476 4152
|
||||
532 4814
|
||||
588 5754
|
||||
644 6684
|
||||
700 7226
|
||||
756 8324
|
||||
808 9092
|
||||
866 10068
|
||||
924 11204
|
||||
976 12918
|
||||
1036 13656
|
||||
1092 15248
|
||||
1148 15956
|
||||
1204 17270
|
||||
1260 19894
|
||||
1316 20516
|
||||
1370 21864
|
||||
1428 25554
|
||||
1483 26138
|
||||
1540 27086
|
||||
1596 29246
|
||||
1652 32210
|
||||
1707 32704
|
||||
1764 35142
|
||||
1820 39050
|
||||
1876 39256
|
||||
1931 41574
|
||||
1985 45070
|
||||
2044 46352
|
||||
2099 48114
|
||||
2155 51332
|
||||
2212 53268
|
||||
2267 55890
|
||||
2324 59054
|
||||
2380 60206
|
||||
2434 63540
|
||||
2491 66084
|
||||
2547 68590
|
||||
2604 74332
|
||||
2660 74784
|
||||
2715 77974
|
||||
2772 79924
|
||||
2826 82914
|
||||
2884 87210
|
||||
2929 89076
|
||||
2996 92480
|
||||
3052 96814
|
||||
3108 99990
|
||||
3162 102550
|
||||
3219 105396
|
||||
3276 109284
|
||||
3332 113752
|
||||
3387 116628
|
||||
3444 120782
|
||||
3500 122938
|
||||
3556 127940
|
||||
3612 303656
|
||||
3667 312212
|
||||
3724 324376
|
||||
3779 329204
|
||||
3833 340910
|
||||
3892 353850
|
||||
3943 362348
|
||||
4003 367780
|
||||
4056 380448
|
||||
4114 393616
|
||||
4172 404104
|
||||
4227 415148
|
||||
4284 409770
|
||||
4339 436648
|
||||
4394 442970
|
||||
4451 463096
|
||||
4507 472056
|
||||
4564 485780
|
||||
4616 496286
|
||||
4675 507612
|
||||
4732 519524
|
||||
4788 536768
|
||||
4843 542754
|
||||
4899 553090
|
||||
4956 571986
|
||||
5012 586340
|
||||
5068 599606
|
||||
5124 613670
|
||||
5179 624256
|
||||
5235 636266
|
||||
5292 655518
|
||||
5348 668142
|
||||
5403 677266
|
||||
5460 696040
|
||||
5516 712772
|
||||
5570 723942
|
||||
5628 739052
|
||||
5684 755350
|
||||
5739 769962
|
||||
5790 775258
|
||||
5851 790128
|
||||
5908 814536
|
||||
5962 827278
|
||||
6018 844510
|
||||
6076 851606
|
||||
6130 865748
|
||||
6188 894752
|
||||
6244 900474
|
||||
6300 928174
|
||||
6356 928440
|
||||
6410 957758
|
||||
6468 981134
|
||||
6524 994088
|
||||
6580 1011124
|
||||
6636 1027178
|
||||
6692 1045466
|
||||
6747 1056910
|
||||
6804 1083784
|
||||
6860 1104706
|
||||
6915 1116450
|
||||
6972 1137894
|
||||
7028 1154670
|
||||
7084 1158064
|
||||
7138 1188734
|
||||
7196 1214218
|
||||
7249 1226822
|
||||
7307 1247528
|
||||
7363 1255338
|
||||
7420 1291104
|
||||
7475 1297940
|
||||
7532 1324994
|
||||
7587 1340274
|
||||
7644 1342596
|
||||
7698 1381418
|
||||
7756 1382904
|
||||
7812 1432588
|
||||
7867 1443632
|
||||
7922 1465092
|
||||
7979 1496804
|
||||
8036 1520142
|
||||
8092 1539566
|
||||
265 562
|
||||
389 882
|
||||
509 1207
|
||||
631 1572
|
||||
750 1990
|
||||
859 2433
|
||||
991 2894
|
||||
1109 3555
|
||||
1230 4228
|
||||
1350 5018
|
||||
1471 5805
|
||||
1591 6579
|
||||
1709 7415
|
||||
1829 8329
|
||||
1949 9225
|
||||
2071 10139
|
||||
2188 11239
|
||||
2309 12178
|
||||
2431 13212
|
||||
2551 14294
|
||||
2671 15551
|
||||
2791 16512
|
||||
2911 17718
|
||||
3030 18876
|
||||
3150 20259
|
||||
3270 21374
|
||||
3391 22650
|
||||
3511 23948
|
||||
3631 25493
|
||||
3750 26756
|
||||
3870 28225
|
||||
3989 29705
|
||||
4110 31409
|
||||
4230 32834
|
||||
4351 34327
|
||||
4471 35818
|
||||
4591 37636
|
||||
4711 39228
|
||||
4830 40868
|
||||
4949 42393
|
||||
5070 44541
|
||||
5191 46269
|
||||
5310 48162
|
||||
5429 49728
|
||||
5548 51985
|
||||
5671 53948
|
||||
5791 55885
|
||||
5910 57584
|
||||
6031 60082
|
||||
6150 62239
|
||||
6270 64309
|
||||
6390 66014
|
||||
6511 68766
|
||||
6631 71012
|
||||
6750 73172
|
||||
6871 74952
|
||||
6991 77909
|
||||
7111 80371
|
||||
7231 82666
|
||||
7351 84531
|
||||
7469 87698
|
||||
7589 90318
|
||||
7711 225384
|
||||
7830 232428
|
||||
7950 240009
|
||||
8070 246522
|
||||
8190 253662
|
||||
8310 260961
|
||||
8431 269253
|
||||
8549 275743
|
||||
8671 283769
|
||||
8789 290811
|
||||
8911 300034
|
||||
9030 306873
|
||||
9149 315085
|
||||
9270 323944
|
||||
9390 332390
|
||||
9508 337519
|
||||
9631 348986
|
||||
9749 356904
|
||||
9871 367013
|
||||
9989 373831
|
||||
10108 381033
|
||||
10230 393475
|
||||
|
||||
@@ -1,33 +1,84 @@
|
||||
922 11272
|
||||
1148 16004
|
||||
1370 21958
|
||||
1596 28684
|
||||
1817 37832
|
||||
2044 46386
|
||||
2262 56218
|
||||
2492 66388
|
||||
2716 77478
|
||||
2940 89380
|
||||
3163 103680
|
||||
3385 116274
|
||||
3612 135334
|
||||
3836 151332
|
||||
4057 164938
|
||||
4284 183178
|
||||
4508 198864
|
||||
4731 215222
|
||||
4954 231986
|
||||
5180 251660
|
||||
5404 269414
|
||||
5626 288454
|
||||
5850 307806
|
||||
6076 329458
|
||||
6299 347726
|
||||
6523 369864
|
||||
6748 387832
|
||||
6971 413010
|
||||
7194 453310
|
||||
7415 476936
|
||||
7643 497118
|
||||
7867 521394
|
||||
8091 540224
|
||||
271 560
|
||||
388 878
|
||||
511 1179
|
||||
629 1625
|
||||
751 1988
|
||||
871 2423
|
||||
989 2896
|
||||
1111 3561
|
||||
1231 4209
|
||||
1350 5015
|
||||
1470 5804
|
||||
1591 6556
|
||||
1709 7420
|
||||
1831 8263
|
||||
1951 9173
|
||||
2070 10153
|
||||
2191 11229
|
||||
2310 12167
|
||||
2431 13211
|
||||
2550 14309
|
||||
2671 15524
|
||||
2788 16525
|
||||
2910 17712
|
||||
3028 18822
|
||||
3148 20220
|
||||
3271 21343
|
||||
3391 22652
|
||||
3511 23944
|
||||
3630 25485
|
||||
3750 26778
|
||||
3868 28201
|
||||
3990 29653
|
||||
4111 31393
|
||||
4225 32841
|
||||
4350 34328
|
||||
4471 35786
|
||||
4590 37652
|
||||
4711 39245
|
||||
4830 40876
|
||||
4951 42433
|
||||
5068 44547
|
||||
5191 46321
|
||||
5311 48140
|
||||
5430 49727
|
||||
5550 52034
|
||||
5671 53954
|
||||
5791 55921
|
||||
5908 57597
|
||||
6031 60084
|
||||
6148 62226
|
||||
6270 64295
|
||||
6390 66045
|
||||
6511 68779
|
||||
6629 71003
|
||||
6751 73169
|
||||
6871 74992
|
||||
6991 77895
|
||||
7110 80376
|
||||
7231 82628
|
||||
7351 84468
|
||||
7470 87664
|
||||
7591 90284
|
||||
7711 91352
|
||||
7828 93995
|
||||
7950 96276
|
||||
8071 98691
|
||||
8190 101256
|
||||
8308 103631
|
||||
8431 105222
|
||||
8550 108343
|
||||
8671 110281
|
||||
8787 112764
|
||||
8911 115397
|
||||
9031 117690
|
||||
9151 120266
|
||||
9271 122715
|
||||
9391 124624
|
||||
9510 127937
|
||||
9630 130313
|
||||
9750 132914
|
||||
9871 136129
|
||||
9991 138517
|
||||
10108 141525
|
||||
10231 144225
|
||||
|
||||
32
logs/sub.log
32
logs/sub.log
@@ -1,16 +1,16 @@
|
||||
224 216
|
||||
448 324
|
||||
672 428
|
||||
896 532
|
||||
1120 648
|
||||
1344 766
|
||||
1568 862
|
||||
1792 928
|
||||
2016 1070
|
||||
2240 1128
|
||||
2464 1250
|
||||
2688 1344
|
||||
2912 1436
|
||||
3136 1542
|
||||
3360 1628
|
||||
3584 1696
|
||||
480 94
|
||||
960 116
|
||||
1440 140
|
||||
1920 164
|
||||
2400 205
|
||||
2880 229
|
||||
3360 253
|
||||
3840 277
|
||||
4320 299
|
||||
4800 321
|
||||
5280 345
|
||||
5760 371
|
||||
6240 395
|
||||
6720 419
|
||||
7200 441
|
||||
7680 465
|
||||
|
||||
@@ -27,11 +27,13 @@ bn_mp_prime_is_prime.obj bn_mp_prime_next_prime.obj bn_mp_dr_reduce.obj \
|
||||
bn_mp_dr_is_modulus.obj bn_mp_dr_setup.obj bn_mp_reduce_setup.obj \
|
||||
bn_mp_toom_mul.obj bn_mp_toom_sqr.obj bn_mp_div_3.obj bn_s_mp_exptmod.obj \
|
||||
bn_mp_reduce_2k.obj bn_mp_reduce_is_2k.obj bn_mp_reduce_2k_setup.obj \
|
||||
bn_mp_reduce_2k_l.obj bn_mp_reduce_is_2k_l.obj bn_mp_reduce_2k_setup_l.obj \
|
||||
bn_mp_radix_smap.obj bn_mp_read_radix.obj bn_mp_toradix.obj bn_mp_radix_size.obj \
|
||||
bn_mp_fread.obj bn_mp_fwrite.obj bn_mp_cnt_lsb.obj bn_error.obj \
|
||||
bn_mp_init_multi.obj bn_mp_clear_multi.obj bn_mp_exteuclid.obj bn_mp_toradix_n.obj \
|
||||
bn_mp_prime_random_ex.obj bn_mp_get_int.obj bn_mp_sqrt.obj bn_mp_is_square.obj \
|
||||
bn_mp_init_set.obj bn_mp_init_set_int.obj bn_mp_invmod_slow.obj bn_mp_prime_rabin_miller_trials.obj
|
||||
bn_mp_init_set.obj bn_mp_init_set_int.obj bn_mp_invmod_slow.obj bn_mp_prime_rabin_miller_trials.obj \
|
||||
bn_mp_to_signed_bin_n.obj bn_mp_to_unsigned_bin_n.obj
|
||||
|
||||
TARGET = libtommath.lib
|
||||
|
||||
|
||||
@@ -32,11 +32,13 @@ bn_mp_prime_is_prime.o bn_mp_prime_next_prime.o bn_mp_dr_reduce.o \
|
||||
bn_mp_dr_is_modulus.o bn_mp_dr_setup.o bn_mp_reduce_setup.o \
|
||||
bn_mp_toom_mul.o bn_mp_toom_sqr.o bn_mp_div_3.o bn_s_mp_exptmod.o \
|
||||
bn_mp_reduce_2k.o bn_mp_reduce_is_2k.o bn_mp_reduce_2k_setup.o \
|
||||
bn_mp_reduce_2k_l.o bn_mp_reduce_is_2k_l.o bn_mp_reduce_2k_setup_l.o \
|
||||
bn_mp_radix_smap.o bn_mp_read_radix.o bn_mp_toradix.o bn_mp_radix_size.o \
|
||||
bn_mp_fread.o bn_mp_fwrite.o bn_mp_cnt_lsb.o bn_error.o \
|
||||
bn_mp_init_multi.o bn_mp_clear_multi.o bn_mp_exteuclid.o bn_mp_toradix_n.o \
|
||||
bn_mp_prime_random_ex.o bn_mp_get_int.o bn_mp_sqrt.o bn_mp_is_square.o bn_mp_init_set.o \
|
||||
bn_mp_init_set_int.o bn_mp_invmod_slow.o bn_mp_prime_rabin_miller_trials.o
|
||||
bn_mp_init_set_int.o bn_mp_invmod_slow.o bn_mp_prime_rabin_miller_trials.o \
|
||||
bn_mp_to_signed_bin_n.o bn_mp_to_unsigned_bin_n.o
|
||||
|
||||
# make a Windows DLL via Cygwin
|
||||
windll: $(OBJECTS)
|
||||
|
||||
16
makefile.icc
16
makefile.icc
@@ -21,6 +21,10 @@ CFLAGS += -I./
|
||||
# Default to just generic max opts
|
||||
CFLAGS += -O3 -xN
|
||||
|
||||
#install as this user
|
||||
USER=root
|
||||
GROUP=root
|
||||
|
||||
default: libtommath.a
|
||||
|
||||
#default files to install
|
||||
@@ -55,11 +59,13 @@ bn_mp_prime_is_prime.o bn_mp_prime_next_prime.o bn_mp_dr_reduce.o \
|
||||
bn_mp_dr_is_modulus.o bn_mp_dr_setup.o bn_mp_reduce_setup.o \
|
||||
bn_mp_toom_mul.o bn_mp_toom_sqr.o bn_mp_div_3.o bn_s_mp_exptmod.o \
|
||||
bn_mp_reduce_2k.o bn_mp_reduce_is_2k.o bn_mp_reduce_2k_setup.o \
|
||||
bn_mp_reduce_2k_l.o bn_mp_reduce_is_2k_l.o bn_mp_reduce_2k_setup_l.o \
|
||||
bn_mp_radix_smap.o bn_mp_read_radix.o bn_mp_toradix.o bn_mp_radix_size.o \
|
||||
bn_mp_fread.o bn_mp_fwrite.o bn_mp_cnt_lsb.o bn_error.o \
|
||||
bn_mp_init_multi.o bn_mp_clear_multi.o bn_mp_exteuclid.o bn_mp_toradix_n.o \
|
||||
bn_mp_prime_random_ex.o bn_mp_get_int.o bn_mp_sqrt.o bn_mp_is_square.o bn_mp_init_set.o \
|
||||
bn_mp_init_set_int.o bn_mp_invmod_slow.o bn_mp_prime_rabin_miller_trials.o
|
||||
bn_mp_init_set_int.o bn_mp_invmod_slow.o bn_mp_prime_rabin_miller_trials.o \
|
||||
bn_mp_to_signed_bin_n.o bn_mp_to_unsigned_bin_n.o
|
||||
|
||||
libtommath.a: $(OBJECTS)
|
||||
$(AR) $(ARFLAGS) libtommath.a $(OBJECTS)
|
||||
@@ -89,10 +95,10 @@ profiled_single:
|
||||
ranlib libtommath.a
|
||||
|
||||
install: libtommath.a
|
||||
install -d -g root -o root $(DESTDIR)$(LIBPATH)
|
||||
install -d -g root -o root $(DESTDIR)$(INCPATH)
|
||||
install -g root -o root $(LIBNAME) $(DESTDIR)$(LIBPATH)
|
||||
install -g root -o root $(HEADERS) $(DESTDIR)$(INCPATH)
|
||||
install -d -g $(GROUP) -o $(USER) $(DESTDIR)$(LIBPATH)
|
||||
install -d -g $(GROUP) -o $(USER) $(DESTDIR)$(INCPATH)
|
||||
install -g $(GROUP) -o $(USER) $(LIBNAME) $(DESTDIR)$(LIBPATH)
|
||||
install -g $(GROUP) -o $(USER) $(HEADERS) $(DESTDIR)$(INCPATH)
|
||||
|
||||
test: libtommath.a demo/demo.o
|
||||
$(CC) demo/demo.o libtommath.a -o test
|
||||
|
||||
@@ -26,11 +26,13 @@ bn_mp_prime_is_prime.obj bn_mp_prime_next_prime.obj bn_mp_dr_reduce.obj \
|
||||
bn_mp_dr_is_modulus.obj bn_mp_dr_setup.obj bn_mp_reduce_setup.obj \
|
||||
bn_mp_toom_mul.obj bn_mp_toom_sqr.obj bn_mp_div_3.obj bn_s_mp_exptmod.obj \
|
||||
bn_mp_reduce_2k.obj bn_mp_reduce_is_2k.obj bn_mp_reduce_2k_setup.obj \
|
||||
bn_mp_reduce_2k_l.obj bn_mp_reduce_is_2k_l.obj bn_mp_reduce_2k_setup_l.obj \
|
||||
bn_mp_radix_smap.obj bn_mp_read_radix.obj bn_mp_toradix.obj bn_mp_radix_size.obj \
|
||||
bn_mp_fread.obj bn_mp_fwrite.obj bn_mp_cnt_lsb.obj bn_error.obj \
|
||||
bn_mp_init_multi.obj bn_mp_clear_multi.obj bn_mp_exteuclid.obj bn_mp_toradix_n.obj \
|
||||
bn_mp_prime_random_ex.obj bn_mp_get_int.obj bn_mp_sqrt.obj bn_mp_is_square.obj \
|
||||
bn_mp_init_set.obj bn_mp_init_set_int.obj bn_mp_invmod_slow.obj bn_mp_prime_rabin_miller_trials.obj
|
||||
bn_mp_init_set.obj bn_mp_init_set_int.obj bn_mp_invmod_slow.obj bn_mp_prime_rabin_miller_trials.obj \
|
||||
bn_mp_to_signed_bin_n.obj bn_mp_to_unsigned_bin_n.obj
|
||||
|
||||
library: $(OBJECTS)
|
||||
lib /out:tommath.lib $(OBJECTS)
|
||||
|
||||
@@ -1,10 +1,9 @@
|
||||
#Makefile for GCC
|
||||
#
|
||||
#Tom St Denis
|
||||
VERSION=0:32
|
||||
VERSION=0:35
|
||||
|
||||
CC = libtool --mode=compile gcc
|
||||
|
||||
CFLAGS += -I./ -Wall -W -Wshadow -Wsign-compare
|
||||
|
||||
#for speed
|
||||
@@ -16,11 +15,15 @@ CFLAGS += -O3 -funroll-loops
|
||||
#x86 optimizations [should be valid for any GCC install though]
|
||||
CFLAGS += -fomit-frame-pointer
|
||||
|
||||
#install as this user
|
||||
USER=root
|
||||
GROUP=root
|
||||
|
||||
default: libtommath.la
|
||||
|
||||
#default files to install
|
||||
LIBNAME=libtommath.la
|
||||
HEADERS=tommath.h
|
||||
HEADERS=tommath.h tommath_class.h tommath_superclass.h
|
||||
|
||||
#LIBPATH-The directory for libtommath to be installed to.
|
||||
#INCPATH-The directory to install the header files for libtommath.
|
||||
@@ -50,18 +53,21 @@ bn_mp_prime_is_prime.o bn_mp_prime_next_prime.o bn_mp_dr_reduce.o \
|
||||
bn_mp_dr_is_modulus.o bn_mp_dr_setup.o bn_mp_reduce_setup.o \
|
||||
bn_mp_toom_mul.o bn_mp_toom_sqr.o bn_mp_div_3.o bn_s_mp_exptmod.o \
|
||||
bn_mp_reduce_2k.o bn_mp_reduce_is_2k.o bn_mp_reduce_2k_setup.o \
|
||||
bn_mp_reduce_2k_l.o bn_mp_reduce_is_2k_l.o bn_mp_reduce_2k_setup_l.o \
|
||||
bn_mp_radix_smap.o bn_mp_read_radix.o bn_mp_toradix.o bn_mp_radix_size.o \
|
||||
bn_mp_fread.o bn_mp_fwrite.o bn_mp_cnt_lsb.o bn_error.o \
|
||||
bn_mp_init_multi.o bn_mp_clear_multi.o bn_mp_exteuclid.o bn_mp_toradix_n.o \
|
||||
bn_mp_prime_random_ex.o bn_mp_get_int.o bn_mp_sqrt.o bn_mp_is_square.o bn_mp_init_set.o \
|
||||
bn_mp_init_set_int.o bn_mp_invmod_slow.o bn_mp_prime_rabin_miller_trials.o
|
||||
bn_mp_init_set_int.o bn_mp_invmod_slow.o bn_mp_prime_rabin_miller_trials.o \
|
||||
bn_mp_to_signed_bin_n.o bn_mp_to_unsigned_bin_n.o
|
||||
|
||||
|
||||
libtommath.la: $(OBJECTS)
|
||||
libtool --mode=link gcc *.lo -o libtommath.la -rpath $(LIBPATH) -version-info $(VERSION)
|
||||
libtool --mode=link gcc *.o -o libtommath.a
|
||||
libtool --mode=install install -c libtommath.la $(LIBPATH)/libtommath.la
|
||||
install -d -g root -o root $(DESTDIR)$(INCPATH)
|
||||
install -g root -o root $(HEADERS) $(DESTDIR)$(INCPATH)
|
||||
install -d -g $(GROUP) -o $(USER) $(DESTDIR)$(INCPATH)
|
||||
install -g $(GROUP) -o $(USER) $(HEADERS) $(DESTDIR)$(INCPATH)
|
||||
|
||||
test: libtommath.a demo/demo.o
|
||||
gcc $(CFLAGS) -c demo/demo.c -o demo/demo.o
|
||||
|
||||
@@ -46,7 +46,7 @@ void rand_num(mp_int *a)
|
||||
int n, size;
|
||||
unsigned char buf[2048];
|
||||
|
||||
size = 1 + ((fgetc(rng)<<8) + fgetc(rng)) % 1031;
|
||||
size = 1 + ((fgetc(rng)<<8) + fgetc(rng)) % 101;
|
||||
buf[0] = (fgetc(rng)&1)?1:0;
|
||||
fread(buf+1, 1, size, rng);
|
||||
while (buf[1] == 0) buf[1] = fgetc(rng);
|
||||
@@ -58,7 +58,7 @@ void rand_num2(mp_int *a)
|
||||
int n, size;
|
||||
unsigned char buf[2048];
|
||||
|
||||
size = 10 + ((fgetc(rng)<<8) + fgetc(rng)) % 97;
|
||||
size = 10 + ((fgetc(rng)<<8) + fgetc(rng)) % 101;
|
||||
buf[0] = (fgetc(rng)&1)?1:0;
|
||||
fread(buf+1, 1, size, rng);
|
||||
while (buf[1] == 0) buf[1] = fgetc(rng);
|
||||
|
||||
831
pre_gen/mpi.c
831
pre_gen/mpi.c
File diff suppressed because it is too large
Load Diff
35
tombc/grammar.txt
Normal file
35
tombc/grammar.txt
Normal file
@@ -0,0 +1,35 @@
|
||||
program := program statement | statement | empty
|
||||
statement := { statement } |
|
||||
identifier = numexpression; |
|
||||
identifier[numexpression] = numexpression; |
|
||||
function(expressionlist); |
|
||||
for (identifer = numexpression; numexpression; identifier = numexpression) { statement } |
|
||||
while (numexpression) { statement } |
|
||||
if (numexpresion) { statement } elif |
|
||||
break; |
|
||||
continue;
|
||||
|
||||
elif := else statement | empty
|
||||
function := abs | countbits | exptmod | jacobi | print | isprime | nextprime | issquare | readinteger | exit
|
||||
expressionlist := expressionlist, expression | expression
|
||||
|
||||
// LR(1) !!!?
|
||||
expression := string | numexpression
|
||||
numexpression := cmpexpr && cmpexpr | cmpexpr \|\| cmpexpr | cmpexpr
|
||||
cmpexpr := boolexpr < boolexpr | boolexpr > boolexpr | boolexpr == boolexpr |
|
||||
boolexpr <= boolexpr | boolexpr >= boolexpr | boolexpr
|
||||
boolexpr := shiftexpr & shiftexpr | shiftexpr ^ shiftexpr | shiftexpr \| shiftexpr | shiftexpr
|
||||
shiftexpr := addsubexpr << addsubexpr | addsubexpr >> addsubexpr | addsubexpr
|
||||
addsubexpr := mulexpr + mulexpr | mulexpr - mulexpr | mulexpr
|
||||
mulexpr := expr * expr | expr / expr | expr % expr | expr
|
||||
expr := -nexpr | nexpr
|
||||
nexpr := integer | identifier | ( numexpression ) | identifier[numexpression]
|
||||
|
||||
identifier := identifer digits | identifier alpha | alpha
|
||||
alpha := a ... z | A ... Z
|
||||
integer := hexnumber | digits
|
||||
hexnumber := 0xhexdigits
|
||||
hexdigits := hexdigits hexdigit | hexdigit
|
||||
hexdigit := 0 ... 9 | a ... f | A ... F
|
||||
digits := digits digit | digit
|
||||
digit := 0 ... 9
|
||||
17
tommath.h
17
tommath.h
@@ -429,6 +429,15 @@ int mp_reduce_2k_setup(mp_int *a, mp_digit *d);
|
||||
/* reduces a modulo b where b is of the form 2**p - k [0 <= a] */
|
||||
int mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d);
|
||||
|
||||
/* returns true if a can be reduced with mp_reduce_2k_l */
|
||||
int mp_reduce_is_2k_l(mp_int *a);
|
||||
|
||||
/* determines k value for 2k reduction */
|
||||
int mp_reduce_2k_setup_l(mp_int *a, mp_int *d);
|
||||
|
||||
/* reduces a modulo b where b is of the form 2**p - k [0 <= a] */
|
||||
int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d);
|
||||
|
||||
/* d = a**b (mod c) */
|
||||
int mp_exptmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d);
|
||||
|
||||
@@ -442,7 +451,7 @@ int mp_exptmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d);
|
||||
#endif
|
||||
|
||||
/* table of first PRIME_SIZE primes */
|
||||
extern const mp_digit __prime_tab[];
|
||||
extern const mp_digit ltm_prime_tab[];
|
||||
|
||||
/* result=1 if a is divisible by one of the first PRIME_SIZE primes */
|
||||
int mp_prime_is_divisible(mp_int *a, int *result);
|
||||
@@ -511,12 +520,14 @@ int mp_count_bits(mp_int *a);
|
||||
int mp_unsigned_bin_size(mp_int *a);
|
||||
int mp_read_unsigned_bin(mp_int *a, unsigned char *b, int c);
|
||||
int mp_to_unsigned_bin(mp_int *a, unsigned char *b);
|
||||
int mp_to_unsigned_bin_n (mp_int * a, unsigned char *b, unsigned long *outlen);
|
||||
|
||||
int mp_signed_bin_size(mp_int *a);
|
||||
int mp_read_signed_bin(mp_int *a, unsigned char *b, int c);
|
||||
int mp_to_signed_bin(mp_int *a, unsigned char *b);
|
||||
int mp_to_signed_bin_n (mp_int * a, unsigned char *b, unsigned long *outlen);
|
||||
|
||||
int mp_read_radix(mp_int *a, char *str, int radix);
|
||||
int mp_read_radix(mp_int *a, const char *str, int radix);
|
||||
int mp_toradix(mp_int *a, char *str, int radix);
|
||||
int mp_toradix_n(mp_int * a, char *str, int radix, int maxlen);
|
||||
int mp_radix_size(mp_int *a, int radix, int *size);
|
||||
@@ -554,7 +565,7 @@ int fast_mp_invmod(mp_int *a, mp_int *b, mp_int *c);
|
||||
int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c);
|
||||
int fast_mp_montgomery_reduce(mp_int *a, mp_int *m, mp_digit mp);
|
||||
int mp_exptmod_fast(mp_int *G, mp_int *X, mp_int *P, mp_int *Y, int mode);
|
||||
int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y);
|
||||
int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int mode);
|
||||
void bn_reverse(unsigned char *s, int len);
|
||||
|
||||
extern const char *mp_s_rmap;
|
||||
|
||||
491
tommath.src
491
tommath.src
@@ -49,7 +49,7 @@
|
||||
\begin{document}
|
||||
\frontmatter
|
||||
\pagestyle{empty}
|
||||
\title{Implementing Multiple Precision Arithmetic \\ ~ \\ Draft Edition }
|
||||
\title{Multi--Precision Math}
|
||||
\author{\mbox{
|
||||
%\begin{small}
|
||||
\begin{tabular}{c}
|
||||
@@ -66,7 +66,7 @@ QUALCOMM Australia \\
|
||||
}
|
||||
}
|
||||
\maketitle
|
||||
This text has been placed in the public domain. This text corresponds to the v0.30 release of the
|
||||
This text has been placed in the public domain. This text corresponds to the v0.35 release of the
|
||||
LibTomMath project.
|
||||
|
||||
\begin{alltt}
|
||||
@@ -85,66 +85,32 @@ This text is formatted to the international B5 paper size of 176mm wide by 250mm
|
||||
|
||||
\tableofcontents
|
||||
\listoffigures
|
||||
\chapter*{Prefaces to the Draft Edition}
|
||||
I started this text in April 2003 to complement my LibTomMath library. That is, explain how to implement the functions
|
||||
contained in LibTomMath. The goal is to have a textbook that any Computer Science student can use when implementing their
|
||||
own multiple precision arithmetic. The plan I wanted to follow was flesh out all the
|
||||
ideas and concepts I had floating around in my head and then work on it afterwards refining a little bit at a time. Chance
|
||||
would have it that I ended up with my summer off from Algonquin College and I was given four months solid to work on the
|
||||
text.
|
||||
\chapter*{Prefaces}
|
||||
When I tell people about my LibTom projects and that I release them as public domain they are often puzzled.
|
||||
They ask why I did it and especially why I continue to work on them for free. The best I can explain it is ``Because I can.''
|
||||
Which seems odd and perhaps too terse for adult conversation. I often qualify it with ``I am able, I am willing.'' which
|
||||
perhaps explains it better. I am the first to admit there is not anything that special with what I have done. Perhaps
|
||||
others can see that too and then we would have a society to be proud of. My LibTom projects are what I am doing to give
|
||||
back to society in the form of tools and knowledge that can help others in their endeavours.
|
||||
|
||||
Choosing to not waste any time I dove right into the project even before my spring semester was finished. I wrote a bit
|
||||
off and on at first. The moment my exams were finished I jumped into long 12 to 16 hour days. The result after only
|
||||
a couple of months was a ten chapter, three hundred page draft that I quickly had distributed to anyone who wanted
|
||||
to read it. I had Jean-Luc Cooke print copies for me and I brought them to Crypto'03 in Santa Barbara. So far I have
|
||||
managed to grab a certain level of attention having people from around the world ask me for copies of the text was certain
|
||||
rewarding.
|
||||
I started writing this book because it was the most logical task to further my goal of open academia. The LibTomMath source
|
||||
code itself was written to be easy to follow and learn from. There are times, however, where pure C source code does not
|
||||
explain the algorithms properly. Hence this book. The book literally starts with the foundation of the library and works
|
||||
itself outwards to the more complicated algorithms. The use of both pseudo--code and verbatim source code provides a duality
|
||||
of ``theory'' and ``practice'' that the computer science students of the world shall appreciate. I never deviate too far
|
||||
from relatively straightforward algebra and I hope that this book can be a valuable learning asset.
|
||||
|
||||
Now we are past December 2003. By this time I had pictured that I would have at least finished my second draft of the text.
|
||||
Currently I am far off from this goal. I've done partial re-writes of chapters one, two and three but they are not even
|
||||
finished yet. I haven't given up on the project, only had some setbacks. First O'Reilly declined to publish the text then
|
||||
Addison-Wesley and Greg is tried another which I don't know the name of. However, at this point I want to focus my energy
|
||||
onto finishing the book not securing a contract.
|
||||
This book and indeed much of the LibTom projects would not exist in their current form if it was not for a plethora
|
||||
of kind people donating their time, resources and kind words to help support my work. Writing a text of significant
|
||||
length (along with the source code) is a tiresome and lengthy process. Currently the LibTom project is four years old,
|
||||
comprises of literally thousands of users and over 100,000 lines of source code, TeX and other material. People like Mads and Greg
|
||||
were there at the beginning to encourage me to work well. It is amazing how timely validation from others can boost morale to
|
||||
continue the project. Definitely my parents were there for me by providing room and board during the many months of work in 2003.
|
||||
|
||||
So why am I writing this text? It seems like a lot of work right? Most certainly it is a lot of work writing a textbook.
|
||||
Even the simplest introductory material has to be lined with references and figures. A lot of the text has to be re-written
|
||||
from point form to prose form to ensure an easier read. Why am I doing all this work for free then? Simple. My philosophy
|
||||
is quite simply ``Open Source. Open Academia. Open Minds'' which means that to achieve a goal of open minds, that is,
|
||||
people willing to accept new ideas and explore the unknown you have to make available material they can access freely
|
||||
without hinderance.
|
||||
To my many friends whom I have met through the years I thank you for the good times and the words of encouragement. I hope I
|
||||
honour your kind gestures with this project.
|
||||
|
||||
I've been writing free software since I was about sixteen but only recently have I hit upon software that people have come
|
||||
to depend upon. I started LibTomCrypt in December 2001 and now several major companies use it as integral portions of their
|
||||
software. Several educational institutions use it as a matter of course and many freelance developers use it as
|
||||
part of their projects. To further my contributions I started the LibTomMath project in December 2002 aimed at providing
|
||||
multiple precision arithmetic routines that students could learn from. That is write routines that are not only easy
|
||||
to understand and follow but provide quite impressive performance considering they are all in standard portable ISO C.
|
||||
|
||||
The second leg of my philosophy is ``Open Academia'' which is where this textbook comes in. In the end, when all is
|
||||
said and done the text will be useable by educational institutions as a reference on multiple precision arithmetic.
|
||||
|
||||
At this time I feel I should share a little information about myself. The most common question I was asked at
|
||||
Crypto'03, perhaps just out of professional courtesy, was which school I either taught at or attended. The unfortunate
|
||||
truth is that I neither teach at or attend a school of academic reputation. I'm currently at Algonquin College which
|
||||
is what I'd like to call ``somewhat academic but mostly vocational'' college. In otherwords, job training.
|
||||
|
||||
I'm a 21 year old computer science student mostly self-taught in the areas I am aware of (which includes a half-dozen
|
||||
computer science fields, a few fields of mathematics and some English). I look forward to teaching someday but I am
|
||||
still far off from that goal.
|
||||
|
||||
Now it would be improper for me to not introduce the rest of the texts co-authors. While they are only contributing
|
||||
corrections and editorial feedback their support has been tremendously helpful in presenting the concepts laid out
|
||||
in the text so far. Greg has always been there for me. He has tracked my LibTom projects since their inception and even
|
||||
sent cheques to help pay tuition from time to time. His background has provided a wonderful source to bounce ideas off
|
||||
of and improve the quality of my writing. Mads is another fellow who has just ``been there''. I don't even recall what
|
||||
his interest in the LibTom projects is but I'm definitely glad he has been around. His ability to catch logical errors
|
||||
in my written English have saved me on several occasions to say the least.
|
||||
|
||||
What to expect next? Well this is still a rough draft. I've only had the chance to update a few chapters. However, I've
|
||||
been getting the feeling that people are starting to use my text and I owe them some updated material. My current tenative
|
||||
plan is to edit one chapter every two weeks starting January 4th. It seems insane but my lower course load at college
|
||||
should provide ample time. By Crypto'04 I plan to have a 2nd draft of the text polished and ready to hand out to as many
|
||||
people who will take it.
|
||||
Open Source. Open Academia. Open Minds.
|
||||
|
||||
\begin{flushright} Tom St Denis \end{flushright}
|
||||
|
||||
@@ -937,7 +903,7 @@ assumed to contain undefined values they are initially set to zero.
|
||||
|
||||
EXAM,bn_mp_grow.c
|
||||
|
||||
A quick optimization is to first determine if a memory re-allocation is required at all. The if statement (line @23,if@) checks
|
||||
A quick optimization is to first determine if a memory re-allocation is required at all. The if statement (line @24,alloc@) checks
|
||||
if the \textbf{alloc} member of the mp\_int is smaller than the requested digit count. If the count is not larger than \textbf{alloc}
|
||||
the function skips the re-allocation part thus saving time.
|
||||
|
||||
@@ -1310,7 +1276,7 @@ After the function is completed, all of the digits are zeroed, the \textbf{used}
|
||||
With the mp\_int representation of an integer, calculating the absolute value is trivial. The mp\_abs algorithm will compute
|
||||
the absolute value of an mp\_int.
|
||||
|
||||
\newpage\begin{figure}[here]
|
||||
\begin{figure}[here]
|
||||
\begin{center}
|
||||
\begin{tabular}{l}
|
||||
\hline Algorithm \textbf{mp\_abs}. \\
|
||||
@@ -1335,6 +1301,9 @@ logic to handle it.
|
||||
|
||||
EXAM,bn_mp_abs.c
|
||||
|
||||
This fairly trivial algorithm first eliminates non--required duplications (line @27,a != b@) and then sets the
|
||||
\textbf{sign} flag to \textbf{MP\_ZPOS}.
|
||||
|
||||
\subsection{Integer Negation}
|
||||
With the mp\_int representation of an integer, calculating the negation is also trivial. The mp\_neg algorithm will compute
|
||||
the negative of an mp\_int input.
|
||||
@@ -1368,11 +1337,15 @@ zero as negative.
|
||||
|
||||
EXAM,bn_mp_neg.c
|
||||
|
||||
Like mp\_abs() this function avoids non--required duplications (line @21,a != b@) and then sets the sign. We
|
||||
have to make sure that only non--zero values get a \textbf{sign} of \textbf{MP\_NEG}. If the mp\_int is zero
|
||||
than the \textbf{sign} is hard--coded to \textbf{MP\_ZPOS}.
|
||||
|
||||
\section{Small Constants}
|
||||
\subsection{Setting Small Constants}
|
||||
Often a mp\_int must be set to a relatively small value such as $1$ or $2$. For these cases the mp\_set algorithm is useful.
|
||||
|
||||
\begin{figure}[here]
|
||||
\newpage\begin{figure}[here]
|
||||
\begin{center}
|
||||
\begin{tabular}{l}
|
||||
\hline Algorithm \textbf{mp\_set}. \\
|
||||
@@ -1397,11 +1370,14 @@ single digit is set (\textit{modulo $\beta$}) and the \textbf{used} count is adj
|
||||
|
||||
EXAM,bn_mp_set.c
|
||||
|
||||
Line @21,mp_zero@ calls mp\_zero() to clear the mp\_int and reset the sign. Line @22,MP_MASK@ copies the digit
|
||||
into the least significant location. Note the usage of a new constant \textbf{MP\_MASK}. This constant is used to quickly
|
||||
reduce an integer modulo $\beta$. Since $\beta$ is of the form $2^k$ for any suitable $k$ it suffices to perform a binary AND with
|
||||
$MP\_MASK = 2^k - 1$ to perform the reduction. Finally line @23,a->used@ will set the \textbf{used} member with respect to the
|
||||
digit actually set. This function will always make the integer positive.
|
||||
First we zero (line @21,mp_zero@) the mp\_int to make sure that the other members are initialized for a
|
||||
small positive constant. mp\_zero() ensures that the \textbf{sign} is positive and the \textbf{used} count
|
||||
is zero. Next we set the digit and reduce it modulo $\beta$ (line @22,MP_MASK@). After this step we have to
|
||||
check if the resulting digit is zero or not. If it is not then we set the \textbf{used} count to one, otherwise
|
||||
to zero.
|
||||
|
||||
We can quickly reduce modulo $\beta$ since it is of the form $2^k$ and a quick binary AND operation with
|
||||
$2^k - 1$ will perform the same operation.
|
||||
|
||||
One important limitation of this function is that it will only set one digit. The size of a digit is not fixed, meaning source that uses
|
||||
this function should take that into account. Only trivially small constants can be set using this function.
|
||||
@@ -1503,10 +1479,12 @@ the zero'th digit. If after all of the digits have been compared, no difference
|
||||
|
||||
EXAM,bn_mp_cmp_mag.c
|
||||
|
||||
The two if statements on lines @24,if@ and @28,if@ compare the number of digits in the two inputs. These two are performed before all of the digits
|
||||
are compared since it is a very cheap test to perform and can potentially save considerable time. The implementation given is also not valid
|
||||
without those two statements. $b.alloc$ may be smaller than $a.used$, meaning that undefined values will be read from $b$ past the end of the
|
||||
array of digits.
|
||||
The two if statements (lines @24,if@ and @28,if@) compare the number of digits in the two inputs. These two are
|
||||
performed before all of the digits are compared since it is a very cheap test to perform and can potentially save
|
||||
considerable time. The implementation given is also not valid without those two statements. $b.alloc$ may be
|
||||
smaller than $a.used$, meaning that undefined values will be read from $b$ past the end of the array of digits.
|
||||
|
||||
|
||||
|
||||
\subsection{Signed Comparisons}
|
||||
Comparing with sign considerations is also fairly critical in several routines (\textit{division for example}). Based on an unsigned magnitude
|
||||
@@ -1539,9 +1517,9 @@ $\vert a \vert < \vert b \vert$. Step number four will compare the two when the
|
||||
|
||||
EXAM,bn_mp_cmp.c
|
||||
|
||||
The two if statements on lines @22,if@ and @26,if@ perform the initial sign comparison. If the signs are not the equal then which ever
|
||||
has the positive sign is larger. At line @30,if@, the inputs are compared based on magnitudes. If the signs were both negative then
|
||||
the unsigned comparison is performed in the opposite direction (\textit{line @31,mp_cmp_mag@}). Otherwise, the signs are assumed to
|
||||
The two if statements (lines @22,if@ and @26,if@) perform the initial sign comparison. If the signs are not the equal then which ever
|
||||
has the positive sign is larger. The inputs are compared (line @30,if@) based on magnitudes. If the signs were both
|
||||
negative then the unsigned comparison is performed in the opposite direction (line @31,mp_cmp_mag@). Otherwise, the signs are assumed to
|
||||
be both positive and a forward direction unsigned comparison is performed.
|
||||
|
||||
\section*{Exercises}
|
||||
@@ -1664,19 +1642,21 @@ The final carry is stored in $c_{max}$ and digits above $max$ upto $oldused$ are
|
||||
|
||||
EXAM,bn_s_mp_add.c
|
||||
|
||||
Lines @27,if@ to @35,}@ perform the initial sorting of the inputs and determine the $min$ and $max$ variables. Note that $x$ is a pointer to a
|
||||
mp\_int assigned to the largest input, in effect it is a local alias. Lines @37,init@ to @42,}@ ensure that the destination is grown to
|
||||
accomodate the result of the addition.
|
||||
We first sort (lines @27,if@ to @35,}@) the inputs based on magnitude and determine the $min$ and $max$ variables.
|
||||
Note that $x$ is a pointer to an mp\_int assigned to the largest input, in effect it is a local alias. Next we
|
||||
grow the destination (@37,init@ to @42,}@) ensure that it can accomodate the result of the addition.
|
||||
|
||||
Similar to the implementation of mp\_copy this function uses the braced code and local aliases coding style. The three aliases that are on
|
||||
lines @56,tmpa@, @59,tmpb@ and @62,tmpc@ represent the two inputs and destination variables respectively. These aliases are used to ensure the
|
||||
compiler does not have to dereference $a$, $b$ or $c$ (respectively) to access the digits of the respective mp\_int.
|
||||
|
||||
The initial carry $u$ is cleared on line @65,u = 0@, note that $u$ is of type mp\_digit which ensures type compatibility within the
|
||||
implementation. The initial addition loop begins on line @66,for@ and ends on line @75,}@. Similarly the conditional addition loop
|
||||
begins on line @81,for@ and ends on line @90,}@. The addition is finished with the final carry being stored in $tmpc$ on line @94,tmpc++@.
|
||||
Note the ``++'' operator on the same line. After line @94,tmpc++@ $tmpc$ will point to the $c.used$'th digit of the mp\_int $c$. This is useful
|
||||
for the next loop on lines @97,for@ to @99,}@ which set any old upper digits to zero.
|
||||
The initial carry $u$ will be cleared (line @65,u = 0@), note that $u$ is of type mp\_digit which ensures type
|
||||
compatibility within the implementation. The initial addition (line @66,for@ to @75,}@) adds digits from
|
||||
both inputs until the smallest input runs out of digits. Similarly the conditional addition loop
|
||||
(line @81,for@ to @90,}@) adds the remaining digits from the larger of the two inputs. The addition is finished
|
||||
with the final carry being stored in $tmpc$ (line @94,tmpc++@). Note the ``++'' operator within the same expression.
|
||||
After line @94,tmpc++@, $tmpc$ will point to the $c.used$'th digit of the mp\_int $c$. This is useful
|
||||
for the next loop (line @97,for@ to @99,}@) which set any old upper digits to zero.
|
||||
|
||||
\subsection{Low Level Subtraction}
|
||||
The low level unsigned subtraction algorithm is very similar to the low level unsigned addition algorithm. The principle difference is that the
|
||||
@@ -1692,7 +1672,7 @@ this algorithm we will assume that the variable $\gamma$ represents the number o
|
||||
mp\_digit (\textit{this implies $2^{\gamma} > \beta$}).
|
||||
|
||||
For example, the default for LibTomMath is to use a ``unsigned long'' for the mp\_digit ``type'' while $\beta = 2^{28}$. In ISO C an ``unsigned long''
|
||||
data type must be able to represent $0 \le x < 2^{32}$ meaning that in this case $\gamma = 32$.
|
||||
data type must be able to represent $0 \le x < 2^{32}$ meaning that in this case $\gamma \ge 32$.
|
||||
|
||||
\newpage\begin{figure}[!here]
|
||||
\begin{center}
|
||||
@@ -1759,20 +1739,23 @@ If $b$ has a smaller magnitude than $a$ then step 9 will force the carry and cop
|
||||
|
||||
EXAM,bn_s_mp_sub.c
|
||||
|
||||
Line @24,min@ and @25,max@ perform the initial hardcoded sorting of the inputs. In reality the $min$ and $max$ variables are only aliases and are only
|
||||
used to make the source code easier to read. Again the pointer alias optimization is used within this algorithm. Lines @42,tmpa@, @43,tmpb@ and @44,tmpc@ initialize the aliases for
|
||||
$a$, $b$ and $c$ respectively.
|
||||
Like low level addition we ``sort'' the inputs. Except in this case the sorting is hardcoded
|
||||
(lines @24,min@ and @25,max@). In reality the $min$ and $max$ variables are only aliases and are only
|
||||
used to make the source code easier to read. Again the pointer alias optimization is used
|
||||
within this algorithm. The aliases $tmpa$, $tmpb$ and $tmpc$ are initialized
|
||||
(lines @42,tmpa@, @43,tmpb@ and @44,tmpc@) for $a$, $b$ and $c$ respectively.
|
||||
|
||||
The first subtraction loop occurs on lines @47,u = 0@ through @61,}@. The theory behind the subtraction loop is exactly the same as that for
|
||||
the addition loop. As remarked earlier there is an implementation reason for using the ``awkward'' method of extracting the carry
|
||||
(\textit{see line @57, >>@}). The traditional method for extracting the carry would be to shift by $lg(\beta)$ positions and logically AND
|
||||
the least significant bit. The AND operation is required because all of the bits above the $\lg(\beta)$'th bit will be set to one after a carry
|
||||
occurs from subtraction. This carry extraction requires two relatively cheap operations to extract the carry. The other method is to simply
|
||||
shift the most significant bit to the least significant bit thus extracting the carry with a single cheap operation. This optimization only works on
|
||||
twos compliment machines which is a safe assumption to make.
|
||||
The first subtraction loop (lines @47,u = 0@ through @61,}@) subtract digits from both inputs until the smaller of
|
||||
the two inputs has been exhausted. As remarked earlier there is an implementation reason for using the ``awkward''
|
||||
method of extracting the carry (line @57, >>@). The traditional method for extracting the carry would be to shift
|
||||
by $lg(\beta)$ positions and logically AND the least significant bit. The AND operation is required because all of
|
||||
the bits above the $\lg(\beta)$'th bit will be set to one after a carry occurs from subtraction. This carry
|
||||
extraction requires two relatively cheap operations to extract the carry. The other method is to simply shift the
|
||||
most significant bit to the least significant bit thus extracting the carry with a single cheap operation. This
|
||||
optimization only works on twos compliment machines which is a safe assumption to make.
|
||||
|
||||
If $a$ has a larger magnitude than $b$ an additional loop (\textit{see lines @64,for@ through @73,}@}) is required to propagate the carry through
|
||||
$a$ and copy the result to $c$.
|
||||
If $a$ has a larger magnitude than $b$ an additional loop (lines @64,for@ through @73,}@) is required to propagate
|
||||
the carry through $a$ and copy the result to $c$.
|
||||
|
||||
\subsection{High Level Addition}
|
||||
Now that both lower level addition and subtraction algorithms have been established an effective high level signed addition algorithm can be
|
||||
@@ -2098,10 +2081,11 @@ FIGU,sliding_window,Sliding Window Movement
|
||||
|
||||
EXAM,bn_mp_lshd.c
|
||||
|
||||
The if statement on line @24,if@ ensures that the $b$ variable is greater than zero. The \textbf{used} count is incremented by $b$ before
|
||||
the copy loop begins. This elminates the need for an additional variable in the for loop. The variable $top$ on line @42,top@ is an alias
|
||||
for the leading digit while $bottom$ on line @45,bottom@ is an alias for the trailing edge. The aliases form a window of exactly $b$ digits
|
||||
over the input.
|
||||
The if statement (line @24,if@) ensures that the $b$ variable is greater than zero since we do not interpret negative
|
||||
shift counts properly. The \textbf{used} count is incremented by $b$ before the copy loop begins. This elminates
|
||||
the need for an additional variable in the for loop. The variable $top$ (line @42,top@) is an alias
|
||||
for the leading digit while $bottom$ (line @45,bottom@) is an alias for the trailing edge. The aliases form a
|
||||
window of exactly $b$ digits over the input.
|
||||
|
||||
\subsection{Division by $x$}
|
||||
|
||||
@@ -2151,9 +2135,9 @@ Once the window copy is complete the upper digits must be zeroed and the \textbf
|
||||
|
||||
EXAM,bn_mp_rshd.c
|
||||
|
||||
The only noteworthy element of this routine is the lack of a return type.
|
||||
|
||||
-- Will update later to give it a return type...Tom
|
||||
The only noteworthy element of this routine is the lack of a return type since it cannot fail. Like mp\_lshd() we
|
||||
form a sliding window except we copy in the other direction. After the window (line @59,for (;@) we then zero
|
||||
the upper digits of the input to make sure the result is correct.
|
||||
|
||||
\section{Powers of Two}
|
||||
|
||||
@@ -2214,7 +2198,15 @@ complete. It is possible to optimize this algorithm down to a $O(n)$ algorithm
|
||||
|
||||
EXAM,bn_mp_mul_2d.c
|
||||
|
||||
Notes to be revised when code is updated. -- Tom
|
||||
The shifting is performed in--place which means the first step (line @24,a != c@) is to copy the input to the
|
||||
destination. We avoid calling mp\_copy() by making sure the mp\_ints are different. The destination then
|
||||
has to be grown (line @31,grow@) to accomodate the result.
|
||||
|
||||
If the shift count $b$ is larger than $lg(\beta)$ then a call to mp\_lshd() is used to handle all of the multiples
|
||||
of $lg(\beta)$. Leaving only a remaining shift of $lg(\beta) - 1$ or fewer bits left. Inside the actual shift
|
||||
loop (lines @45,if@ to @76,}@) we make use of pre--computed values $shift$ and $mask$. These are used to
|
||||
extract the carry bit(s) to pass into the next iteration of the loop. The $r$ and $rr$ variables form a
|
||||
chain between consecutive iterations to propagate the carry.
|
||||
|
||||
\subsection{Division by Power of Two}
|
||||
|
||||
@@ -2263,7 +2255,8 @@ ignored by passing \textbf{NULL} as the pointer to the mp\_int variable. The
|
||||
result of the remainder operation until the end. This allows $d$ and $a$ to represent the same mp\_int without modifying $a$ before
|
||||
the quotient is obtained.
|
||||
|
||||
The remainder of the source code is essentially the same as the source code for mp\_mul\_2d. (-- Fix this paragraph up later, Tom).
|
||||
The remainder of the source code is essentially the same as the source code for mp\_mul\_2d. The only significant difference is
|
||||
the direction of the shifts.
|
||||
|
||||
\subsection{Remainder of Division by Power of Two}
|
||||
|
||||
@@ -2306,7 +2299,13 @@ is copied to $b$, leading digits are removed and the remaining leading digit is
|
||||
|
||||
EXAM,bn_mp_mod_2d.c
|
||||
|
||||
-- Add comments later, Tom.
|
||||
We first avoid cases of $b \le 0$ by simply mp\_zero()'ing the destination in such cases. Next if $2^b$ is larger
|
||||
than the input we just mp\_copy() the input and return right away. After this point we know we must actually
|
||||
perform some work to produce the remainder.
|
||||
|
||||
Recalling that reducing modulo $2^k$ and a binary ``and'' with $2^k - 1$ are numerically equivalent we can quickly reduce
|
||||
the number. First we zero any digits above the last digit in $2^b$ (line @41,for@). Next we reduce the
|
||||
leading digit of both (line @45,&=@) and then mp\_clamp().
|
||||
|
||||
\section*{Exercises}
|
||||
\begin{tabular}{cl}
|
||||
@@ -2464,33 +2463,46 @@ exceed the precision requested.
|
||||
|
||||
EXAM,bn_s_mp_mul_digs.c
|
||||
|
||||
Lines @31,if@ to @35,}@ determine if the Comba method can be used first. The conditions for using the Comba routine are that min$(a.used, b.used) < \delta$ and
|
||||
the number of digits of output is less than \textbf{MP\_WARRAY}. This new constant is used to control
|
||||
the stack usage in the Comba routines. By default it is set to $\delta$ but can be reduced when memory is at a premium.
|
||||
First we determine (line @30,if@) if the Comba method can be used first since it's faster. The conditions for
|
||||
sing the Comba routine are that min$(a.used, b.used) < \delta$ and the number of digits of output is less than
|
||||
\textbf{MP\_WARRAY}. This new constant is used to control the stack usage in the Comba routines. By default it is
|
||||
set to $\delta$ but can be reduced when memory is at a premium.
|
||||
|
||||
Of particular importance is the calculation of the $ix+iy$'th column on lines @64,mp_word@, @65,mp_word@ and @66,mp_word@. Note how all of the
|
||||
variables are cast to the type \textbf{mp\_word}, which is also the type of variable $\hat r$. That is to ensure that double precision operations
|
||||
are used instead of single precision. The multiplication on line @65,) * (@ makes use of a specific GCC optimizer behaviour. On the outset it looks like
|
||||
the compiler will have to use a double precision multiplication to produce the result required. Such an operation would be horribly slow on most
|
||||
processors and drag this to a crawl. However, GCC is smart enough to realize that double wide output single precision multipliers can be used. For
|
||||
example, the instruction ``MUL'' on the x86 processor can multiply two 32-bit values and produce a 64-bit result.
|
||||
If we cannot use the Comba method we proceed to setup the baseline routine. We allocate the the destination mp\_int
|
||||
$t$ (line @36,init@) to the exact size of the output to avoid further re--allocations. At this point we now
|
||||
begin the $O(n^2)$ loop.
|
||||
|
||||
This implementation of multiplication has the caveat that it can be trimmed to only produce a variable number of
|
||||
digits as output. In each iteration of the outer loop the $pb$ variable is set (line @48,MIN@) to the maximum
|
||||
number of inner loop iterations.
|
||||
|
||||
Inside the inner loop we calculate $\hat r$ as the mp\_word product of the two mp\_digits and the addition of the
|
||||
carry from the previous iteration. A particularly important observation is that most modern optimizing
|
||||
C compilers (GCC for instance) can recognize that a $N \times N \rightarrow 2N$ multiplication is all that
|
||||
is required for the product. In x86 terms for example, this means using the MUL instruction.
|
||||
|
||||
Each digit of the product is stored in turn (line @68,tmpt@) and the carry propagated (line @71,>>@) to the
|
||||
next iteration.
|
||||
|
||||
\subsection{Faster Multiplication by the ``Comba'' Method}
|
||||
MARK,COMBA
|
||||
|
||||
One of the huge drawbacks of the ``baseline'' algorithms is that at the $O(n^2)$ level the carry must be computed and propagated upwards. This
|
||||
makes the nested loop very sequential and hard to unroll and implement in parallel. The ``Comba'' \cite{COMBA} method is named after little known
|
||||
(\textit{in cryptographic venues}) Paul G. Comba who described a method of implementing fast multipliers that do not require nested
|
||||
carry fixup operations. As an interesting aside it seems that Paul Barrett describes a similar technique in
|
||||
his 1986 paper \cite{BARRETT} written five years before.
|
||||
One of the huge drawbacks of the ``baseline'' algorithms is that at the $O(n^2)$ level the carry must be
|
||||
computed and propagated upwards. This makes the nested loop very sequential and hard to unroll and implement
|
||||
in parallel. The ``Comba'' \cite{COMBA} method is named after little known (\textit{in cryptographic venues}) Paul G.
|
||||
Comba who described a method of implementing fast multipliers that do not require nested carry fixup operations. As an
|
||||
interesting aside it seems that Paul Barrett describes a similar technique in his 1986 paper \cite{BARRETT} written
|
||||
five years before.
|
||||
|
||||
At the heart of the Comba technique is once again the long-hand algorithm. Except in this case a slight twist is placed on how
|
||||
the columns of the result are produced. In the standard long-hand algorithm rows of products are produced then added together to form the
|
||||
final result. In the baseline algorithm the columns are added together after each iteration to get the result instantaneously.
|
||||
At the heart of the Comba technique is once again the long-hand algorithm. Except in this case a slight
|
||||
twist is placed on how the columns of the result are produced. In the standard long-hand algorithm rows of products
|
||||
are produced then added together to form the final result. In the baseline algorithm the columns are added together
|
||||
after each iteration to get the result instantaneously.
|
||||
|
||||
In the Comba algorithm the columns of the result are produced entirely independently of each other. That is at the $O(n^2)$ level a
|
||||
simple multiplication and addition step is performed. The carries of the columns are propagated after the nested loop to reduce the amount
|
||||
of work requiored. Succintly the first step of the algorithm is to compute the product vector $\vec x$ as follows.
|
||||
In the Comba algorithm the columns of the result are produced entirely independently of each other. That is at
|
||||
the $O(n^2)$ level a simple multiplication and addition step is performed. The carries of the columns are propagated
|
||||
after the nested loop to reduce the amount of work requiored. Succintly the first step of the algorithm is to compute
|
||||
the product vector $\vec x$ as follows.
|
||||
|
||||
\begin{equation}
|
||||
\vec x_n = \sum_{i+j = n} a_ib_j, \forall n \in \lbrace 0, 1, 2, \ldots, i + j \rbrace
|
||||
@@ -2584,38 +2596,32 @@ $256$ digits would allow for numbers in the range of $0 \le x < 2^{7168}$ which,
|
||||
\textbf{Input}. mp\_int $a$, mp\_int $b$ and an integer $digs$ \\
|
||||
\textbf{Output}. $c \leftarrow \vert a \vert \cdot \vert b \vert \mbox{ (mod }\beta^{digs}\mbox{)}$. \\
|
||||
\hline \\
|
||||
Place an array of \textbf{MP\_WARRAY} double precision digits named $\hat W$ on the stack. \\
|
||||
Place an array of \textbf{MP\_WARRAY} single precision digits named $W$ on the stack. \\
|
||||
1. If $c.alloc < digs$ then grow $c$ to $digs$ digits. (\textit{mp\_grow}) \\
|
||||
2. If step 1 failed return(\textit{MP\_MEM}).\\
|
||||
\\
|
||||
Zero the temporary array $\hat W$. \\
|
||||
3. for $n$ from $0$ to $digs - 1$ do \\
|
||||
\hspace{3mm}3.1 $\hat W_n \leftarrow 0$ \\
|
||||
3. $pa \leftarrow \mbox{MIN}(digs, a.used + b.used)$ \\
|
||||
\\
|
||||
Compute the columns. \\
|
||||
4. for $ix$ from $0$ to $a.used - 1$ do \\
|
||||
\hspace{3mm}4.1 $pb \leftarrow \mbox{min}(b.used, digs - ix)$ \\
|
||||
\hspace{3mm}4.2 If $pb < 1$ then goto step 5. \\
|
||||
\hspace{3mm}4.3 for $iy$ from $0$ to $pb - 1$ do \\
|
||||
\hspace{6mm}4.3.1 $\hat W_{ix+iy} \leftarrow \hat W_{ix+iy} + a_{ix}b_{iy}$ \\
|
||||
4. $\_ \hat W \leftarrow 0$ \\
|
||||
5. for $ix$ from 0 to $pa - 1$ do \\
|
||||
\hspace{3mm}5.1 $ty \leftarrow \mbox{MIN}(b.used - 1, ix)$ \\
|
||||
\hspace{3mm}5.2 $tx \leftarrow ix - ty$ \\
|
||||
\hspace{3mm}5.3 $iy \leftarrow \mbox{MIN}(a.used - tx, ty + 1)$ \\
|
||||
\hspace{3mm}5.4 for $iz$ from 0 to $iy - 1$ do \\
|
||||
\hspace{6mm}5.4.1 $\_ \hat W \leftarrow \_ \hat W + a_{tx+iy}b_{ty-iy}$ \\
|
||||
\hspace{3mm}5.5 $W_{ix} \leftarrow \_ \hat W (\mbox{mod }\beta)$\\
|
||||
\hspace{3mm}5.6 $\_ \hat W \leftarrow \lfloor \_ \hat W / \beta \rfloor$ \\
|
||||
6. $W_{pa} \leftarrow \_ \hat W (\mbox{mod }\beta)$ \\
|
||||
\\
|
||||
Propagate the carries upwards. \\
|
||||
5. $oldused \leftarrow c.used$ \\
|
||||
6. $c.used \leftarrow digs$ \\
|
||||
7. If $digs > 1$ then do \\
|
||||
\hspace{3mm}7.1. for $ix$ from $1$ to $digs - 1$ do \\
|
||||
\hspace{6mm}7.1.1 $\hat W_{ix} \leftarrow \hat W_{ix} + \lfloor \hat W_{ix-1} / \beta \rfloor$ \\
|
||||
\hspace{6mm}7.1.2 $c_{ix - 1} \leftarrow \hat W_{ix - 1} \mbox{ (mod }\beta\mbox{)}$ \\
|
||||
8. else do \\
|
||||
\hspace{3mm}8.1 $ix \leftarrow 0$ \\
|
||||
9. $c_{ix} \leftarrow \hat W_{ix} \mbox{ (mod }\beta\mbox{)}$ \\
|
||||
7. $oldused \leftarrow c.used$ \\
|
||||
8. $c.used \leftarrow digs$ \\
|
||||
9. for $ix$ from $0$ to $pa$ do \\
|
||||
\hspace{3mm}9.1 $c_{ix} \leftarrow W_{ix}$ \\
|
||||
10. for $ix$ from $pa + 1$ to $oldused - 1$ do \\
|
||||
\hspace{3mm}10.1 $c_{ix} \leftarrow 0$ \\
|
||||
\\
|
||||
Zero excess digits. \\
|
||||
10. If $digs < oldused$ then do \\
|
||||
\hspace{3mm}10.1 for $n$ from $digs$ to $oldused - 1$ do \\
|
||||
\hspace{6mm}10.1.1 $c_n \leftarrow 0$ \\
|
||||
11. Clamp excessive digits of $c$. (\textit{mp\_clamp}) \\
|
||||
12. Return(\textit{MP\_OKAY}). \\
|
||||
11. Clamp $c$. \\
|
||||
12. Return MP\_OKAY. \\
|
||||
\hline
|
||||
\end{tabular}
|
||||
\end{center}
|
||||
@@ -2625,15 +2631,24 @@ Zero excess digits. \\
|
||||
\end{figure}
|
||||
|
||||
\textbf{Algorithm fast\_s\_mp\_mul\_digs.}
|
||||
This algorithm performs the unsigned multiplication of $a$ and $b$ using the Comba method limited to $digs$ digits of precision. The algorithm
|
||||
essentially peforms the same calculation as algorithm s\_mp\_mul\_digs, just much faster.
|
||||
This algorithm performs the unsigned multiplication of $a$ and $b$ using the Comba method limited to $digs$ digits of precision.
|
||||
|
||||
The array $\hat W$ is meant to be on the stack when the algorithm is used. The size of the array does not change which is ideal. Note also that
|
||||
unlike algorithm s\_mp\_mul\_digs no temporary mp\_int is required since the result is calculated directly in $\hat W$.
|
||||
The outer loop of this algorithm is more complicated than that of the baseline multiplier. This is because on the inside of the
|
||||
loop we want to produce one column per pass. This allows the accumulator $\_ \hat W$ to be placed in CPU registers and
|
||||
reduce the memory bandwidth to two \textbf{mp\_digit} reads per iteration.
|
||||
|
||||
The $O(n^2)$ loop on step four is where the Comba method's advantages begin to show through in comparison to the baseline algorithm. The lack of
|
||||
a carry variable or propagation in this loop allows the loop to be performed with only single precision multiplication and additions. Now that each
|
||||
iteration of the inner loop can be performed independent of the others the inner loop can be performed with a high level of parallelism.
|
||||
The $ty$ variable is set to the minimum count of $ix$ or the number of digits in $b$. That way if $a$ has more digits than
|
||||
$b$ this will be limited to $b.used - 1$. The $tx$ variable is set to the to the distance past $b.used$ the variable
|
||||
$ix$ is. This is used for the immediately subsequent statement where we find $iy$.
|
||||
|
||||
The variable $iy$ is the minimum digits we can read from either $a$ or $b$ before running out. Computing one column at a time
|
||||
means we have to scan one integer upwards and the other downwards. $a$ starts at $tx$ and $b$ starts at $ty$. In each
|
||||
pass we are producing the $ix$'th output column and we note that $tx + ty = ix$. As we move $tx$ upwards we have to
|
||||
move $ty$ downards so the equality remains valid. The $iy$ variable is the number of iterations until
|
||||
$tx \ge a.used$ or $ty < 0$ occurs.
|
||||
|
||||
After every inner pass we store the lower half of the accumulator into $W_{ix}$ and then propagate the carry of the accumulator
|
||||
into the next round by dividing $\_ \hat W$ by $\beta$.
|
||||
|
||||
To measure the benefits of the Comba method over the baseline method consider the number of operations that are required. If the
|
||||
cost in terms of time of a multiply and addition is $p$ and the cost of a carry propagation is $q$ then a baseline multiplication would require
|
||||
@@ -2643,20 +2658,20 @@ and addition operations in the nested loop in parallel.
|
||||
|
||||
EXAM,bn_fast_s_mp_mul_digs.c
|
||||
|
||||
The memset on line @47,memset@ clears the initial $\hat W$ array to zero in a single step. Like the slower baseline multiplication
|
||||
implementation a series of aliases (\textit{lines @67, tmpx@, @70, tmpy@ and @75,_W@}) are used to simplify the inner $O(n^2)$ loop.
|
||||
In this case a new alias $\_\hat W$ has been added which refers to the double precision columns offset by $ix$ in each pass.
|
||||
As per the pseudo--code we first calculate $pa$ (line @47,MIN@) as the number of digits to output. Next we begin the outer loop
|
||||
to produce the individual columns of the product. We use the two aliases $tmpx$ and $tmpy$ (lines @61,tmpx@, @62,tmpy@) to point
|
||||
inside the two multiplicands quickly.
|
||||
|
||||
The inner loop on lines @83,for@, @84,mp_word@ and @85,}@ is where the algorithm will spend the majority of the time, which is why it has been
|
||||
stripped to the bones of any extra baggage\footnote{Hence the pointer aliases.}. On x86 processors the multiplication and additions amount to at the
|
||||
very least five instructions (\textit{two loads, two additions, one multiply}) while on the ARMv4 processors they amount to only three
|
||||
(\textit{one load, one store, one multiply-add}). For both of the x86 and ARMv4 processors the GCC compiler performs a good job at unrolling the loop
|
||||
and scheduling the instructions so there are very few dependency stalls.
|
||||
The inner loop (lines @70,for@ to @72,}@) of this implementation is where the tradeoff come into play. Originally this comba
|
||||
implementation was ``row--major'' which means it adds to each of the columns in each pass. After the outer loop it would then fix
|
||||
the carries. This was very fast except it had an annoying drawback. You had to read a mp\_word and two mp\_digits and write
|
||||
one mp\_word per iteration. On processors such as the Athlon XP and P4 this did not matter much since the cache bandwidth
|
||||
is very high and it can keep the ALU fed with data. It did, however, matter on older and embedded cpus where cache is often
|
||||
slower and also often doesn't exist. This new algorithm only performs two reads per iteration under the assumption that the
|
||||
compiler has aliased $\_ \hat W$ to a CPU register.
|
||||
|
||||
In theory the difference between the baseline and comba algorithms is a mere $O(qn)$ time difference. However, in the $O(n^2)$ nested loop of the
|
||||
baseline method there are dependency stalls as the algorithm must wait for the multiplier to finish before propagating the carry to the next
|
||||
digit. As a result fewer of the often multiple execution units\footnote{The AMD Athlon has three execution units and the Intel P4 has four.} can
|
||||
be simultaneously used.
|
||||
After the inner loop we store the current accumulator in $W$ and shift $\_ \hat W$ (lines @75,W[ix]@, @78,>>@) to forward it as
|
||||
a carry for the next pass. After the outer loop we use the final carry (line @82,W[ix]@) as the last digit of the product.
|
||||
|
||||
\subsection{Polynomial Basis Multiplication}
|
||||
To break the $O(n^2)$ barrier in multiplication requires a completely different look at integer multiplication. In the following algorithms
|
||||
@@ -2976,13 +2991,26 @@ result $a \cdot b$ is produced.
|
||||
|
||||
EXAM,bn_mp_toom_mul.c
|
||||
|
||||
-- Comments to be added during editing phase.
|
||||
The first obvious thing to note is that this algorithm is complicated. The complexity is worth it if you are multiplying very
|
||||
large numbers. For example, a 10,000 digit multiplication takes approximaly 99,282,205 fewer single precision multiplications with
|
||||
Toom--Cook than a Comba or baseline approach (this is a savings of more than 99$\%$). For most ``crypto'' sized numbers this
|
||||
algorithm is not practical as Karatsuba has a much lower cutoff point.
|
||||
|
||||
First we split $a$ and $b$ into three roughly equal portions. This has been accomplished (lines @40,mod@ to @69,rshd@) with
|
||||
combinations of mp\_rshd() and mp\_mod\_2d() function calls. At this point $a = a2 \cdot \beta^2 + a1 \cdot \beta + a0$ and similiarly
|
||||
for $b$.
|
||||
|
||||
Next we compute the five points $w0, w1, w2, w3$ and $w4$. Recall that $w0$ and $w4$ can be computed directly from the portions so
|
||||
we get those out of the way first (lines @72,mul@ and @77,mul@). Next we compute $w1, w2$ and $w3$ using Horners method.
|
||||
|
||||
After this point we solve for the actual values of $w1, w2$ and $w3$ by reducing the $5 \times 5$ system which is relatively
|
||||
straight forward.
|
||||
|
||||
\subsection{Signed Multiplication}
|
||||
Now that algorithms to handle multiplications of every useful dimensions have been developed, a rather simple finishing touch is required. So far all
|
||||
of the multiplication algorithms have been unsigned multiplications which leaves only a signed multiplication algorithm to be established.
|
||||
|
||||
\newpage\begin{figure}[!here]
|
||||
\begin{figure}[!here]
|
||||
\begin{small}
|
||||
\begin{center}
|
||||
\begin{tabular}{l}
|
||||
@@ -3065,7 +3093,7 @@ Column two of row one is a square and column three is the first unique column.
|
||||
The baseline squaring algorithm is meant to be a catch-all squaring algorithm. It will handle any of the input sizes that the faster routines
|
||||
will not handle.
|
||||
|
||||
\newpage\begin{figure}[!here]
|
||||
\begin{figure}[!here]
|
||||
\begin{small}
|
||||
\begin{center}
|
||||
\begin{tabular}{l}
|
||||
@@ -3121,9 +3149,14 @@ results calculated so far. This involves expensive carry propagation which will
|
||||
|
||||
EXAM,bn_s_mp_sqr.c
|
||||
|
||||
Inside the outer loop (\textit{see line @32,for@}) the square term is calculated on line @35,r =@. Line @42,>>@ extracts the carry from the square
|
||||
term. Aliases for $a_{ix}$ and $t_{ix+iy}$ are initialized on lines @45,tmpx@ and @48,tmpt@ respectively. The doubling is performed using two
|
||||
additions (\textit{see line @57,r + r@}) since it is usually faster than shifting,if not at least as fast.
|
||||
Inside the outer loop (line @32,for@) the square term is calculated on line @35,r =@. The carry (line @42,>>@) has been
|
||||
extracted from the mp\_word accumulator using a right shift. Aliases for $a_{ix}$ and $t_{ix+iy}$ are initialized
|
||||
(lines @45,tmpx@ and @48,tmpt@) to simplify the inner loop. The doubling is performed using two
|
||||
additions (line @57,r + r@) since it is usually faster than shifting, if not at least as fast.
|
||||
|
||||
The important observation is that the inner loop does not begin at $iy = 0$ like for multiplication. As such the inner loops
|
||||
get progressively shorter as the algorithm proceeds. This is what leads to the savings compared to using a multiplication to
|
||||
square a number.
|
||||
|
||||
\subsection{Faster Squaring by the ``Comba'' Method}
|
||||
A major drawback to the baseline method is the requirement for single precision shifting inside the $O(n^2)$ nested loop. Squaring has an additional
|
||||
@@ -3135,9 +3168,9 @@ propagation operations from the inner loop. However, the inner product must sti
|
||||
that $2a + 2b + 2c = 2(a + b + c)$. That is the sum of all of the double products is equal to double the sum of all the products. For example,
|
||||
$ab + ba + ac + ca = 2ab + 2ac = 2(ab + ac)$.
|
||||
|
||||
However, we cannot simply double all of the columns, since the squares appear only once per row. The most practical solution is to have two mp\_word
|
||||
arrays. One array will hold the squares and the other array will hold the double products. With both arrays the doubling and carry propagation can be
|
||||
moved to a $O(n)$ work level outside the $O(n^2)$ level.
|
||||
However, we cannot simply double all of the columns, since the squares appear only once per row. The most practical solution is to have two
|
||||
mp\_word arrays. One array will hold the squares and the other array will hold the double products. With both arrays the doubling and
|
||||
carry propagation can be moved to a $O(n)$ work level outside the $O(n^2)$ level. In this case, we have an even simpler solution in mind.
|
||||
|
||||
\newpage\begin{figure}[!here]
|
||||
\begin{small}
|
||||
@@ -3147,34 +3180,34 @@ moved to a $O(n)$ work level outside the $O(n^2)$ level.
|
||||
\textbf{Input}. mp\_int $a$ \\
|
||||
\textbf{Output}. $b \leftarrow a^2$ \\
|
||||
\hline \\
|
||||
Place two arrays of \textbf{MP\_WARRAY} mp\_words named $\hat W$ and $\hat {X}$ on the stack. \\
|
||||
Place an array of \textbf{MP\_WARRAY} mp\_digits named $W$ on the stack. \\
|
||||
1. If $b.alloc < 2a.used + 1$ then grow $b$ to $2a.used + 1$ digits. (\textit{mp\_grow}). \\
|
||||
2. If step 1 failed return(\textit{MP\_MEM}). \\
|
||||
3. for $ix$ from $0$ to $2a.used + 1$ do \\
|
||||
\hspace{3mm}3.1 $\hat W_{ix} \leftarrow 0$ \\
|
||||
\hspace{3mm}3.2 $\hat {X}_{ix} \leftarrow 0$ \\
|
||||
4. for $ix$ from $0$ to $a.used - 1$ do \\
|
||||
\hspace{3mm}Compute the square.\\
|
||||
\hspace{3mm}4.1 $\hat {X}_{ix+ix} \leftarrow \left ( a_{ix} \right )^2$ \\
|
||||
\\
|
||||
\hspace{3mm}Compute the double products.\\
|
||||
\hspace{3mm}4.2 for $iy$ from $ix + 1$ to $a.used - 1$ do \\
|
||||
\hspace{6mm}4.2.1 $\hat W_{ix+iy} \leftarrow \hat W_{ix+iy} + a_{ix}a_{iy}$ \\
|
||||
5. $oldused \leftarrow b.used$ \\
|
||||
6. $b.used \leftarrow 2a.used + 1$ \\
|
||||
3. $pa \leftarrow 2 \cdot a.used$ \\
|
||||
4. $\hat W1 \leftarrow 0$ \\
|
||||
5. for $ix$ from $0$ to $pa - 1$ do \\
|
||||
\hspace{3mm}5.1 $\_ \hat W \leftarrow 0$ \\
|
||||
\hspace{3mm}5.2 $ty \leftarrow \mbox{MIN}(a.used - 1, ix)$ \\
|
||||
\hspace{3mm}5.3 $tx \leftarrow ix - ty$ \\
|
||||
\hspace{3mm}5.4 $iy \leftarrow \mbox{MIN}(a.used - tx, ty + 1)$ \\
|
||||
\hspace{3mm}5.5 $iy \leftarrow \mbox{MIN}(iy, \lfloor \left (ty - tx + 1 \right )/2 \rfloor)$ \\
|
||||
\hspace{3mm}5.6 for $iz$ from $0$ to $iz - 1$ do \\
|
||||
\hspace{6mm}5.6.1 $\_ \hat W \leftarrow \_ \hat W + a_{tx + iz}a_{ty - iz}$ \\
|
||||
\hspace{3mm}5.7 $\_ \hat W \leftarrow 2 \cdot \_ \hat W + \hat W1$ \\
|
||||
\hspace{3mm}5.8 if $ix$ is even then \\
|
||||
\hspace{6mm}5.8.1 $\_ \hat W \leftarrow \_ \hat W + \left ( a_{\lfloor ix/2 \rfloor}\right )^2$ \\
|
||||
\hspace{3mm}5.9 $W_{ix} \leftarrow \_ \hat W (\mbox{mod }\beta)$ \\
|
||||
\hspace{3mm}5.10 $\hat W1 \leftarrow \lfloor \_ \hat W / \beta \rfloor$ \\
|
||||
\\
|
||||
Double the products and propagate the carries simultaneously. \\
|
||||
7. $\hat W_0 \leftarrow 2 \hat W_0 + \hat {X}_0$ \\
|
||||
8. for $ix$ from $1$ to $2a.used$ do \\
|
||||
\hspace{3mm}8.1 $\hat W_{ix} \leftarrow 2 \hat W_{ix} + \hat {X}_{ix}$ \\
|
||||
\hspace{3mm}8.2 $\hat W_{ix} \leftarrow \hat W_{ix} + \lfloor \hat W_{ix - 1} / \beta \rfloor$ \\
|
||||
\hspace{3mm}8.3 $b_{ix-1} \leftarrow W_{ix-1} \mbox{ (mod }\beta\mbox{)}$ \\
|
||||
9. $b_{2a.used} \leftarrow \hat W_{2a.used} \mbox{ (mod }\beta\mbox{)}$ \\
|
||||
10. if $2a.used + 1 < oldused$ then do \\
|
||||
\hspace{3mm}10.1 for $ix$ from $2a.used + 1$ to $oldused$ do \\
|
||||
\hspace{6mm}10.1.1 $b_{ix} \leftarrow 0$ \\
|
||||
11. Clamp excess digits from $b$. (\textit{mp\_clamp}) \\
|
||||
12. Return(\textit{MP\_OKAY}). \\
|
||||
6. $oldused \leftarrow b.used$ \\
|
||||
7. $b.used \leftarrow 2 \cdot a.used$ \\
|
||||
8. for $ix$ from $0$ to $pa - 1$ do \\
|
||||
\hspace{3mm}8.1 $b_{ix} \leftarrow W_{ix}$ \\
|
||||
9. for $ix$ from $pa$ to $oldused - 1$ do \\
|
||||
\hspace{3mm}9.1 $b_{ix} \leftarrow 0$ \\
|
||||
10. Clamp excess digits from $b$. (\textit{mp\_clamp}) \\
|
||||
11. Return(\textit{MP\_OKAY}). \\
|
||||
\hline
|
||||
\end{tabular}
|
||||
\end{center}
|
||||
@@ -3183,24 +3216,24 @@ Double the products and propagate the carries simultaneously. \\
|
||||
\end{figure}
|
||||
|
||||
\textbf{Algorithm fast\_s\_mp\_sqr.}
|
||||
This algorithm computes the square of an input using the Comba technique. It is designed to be a replacement for algorithm s\_mp\_sqr when
|
||||
the number of input digits is less than \textbf{MP\_WARRAY} and less than $\delta \over 2$.
|
||||
This algorithm computes the square of an input using the Comba technique. It is designed to be a replacement for algorithm
|
||||
s\_mp\_sqr when the number of input digits is less than \textbf{MP\_WARRAY} and less than $\delta \over 2$.
|
||||
This algorithm is very similar to the Comba multiplier except with a few key differences we shall make note of.
|
||||
|
||||
This routine requires two arrays of mp\_words to be placed on the stack. The first array $\hat W$ will hold the double products and the second
|
||||
array $\hat X$ will hold the squares. Though only at most $MP\_WARRAY \over 2$ words of $\hat X$ are used, it has proven faster on most
|
||||
processors to simply make it a full size array.
|
||||
First, we have an accumulator and carry variables $\_ \hat W$ and $\hat W1$ respectively. This is because the inner loop
|
||||
products are to be doubled. If we had added the previous carry in we would be doubling too much. Next we perform an
|
||||
addition MIN condition on $iy$ (step 5.5) to prevent overlapping digits. For example, $a_3 \cdot a_5$ is equal
|
||||
$a_5 \cdot a_3$. Whereas in the multiplication case we would have $5 < a.used$ and $3 \ge 0$ is maintained since we double the sum
|
||||
of the products just outside the inner loop we have to avoid doing this. This is also a good thing since we perform
|
||||
fewer multiplications and the routine ends up being faster.
|
||||
|
||||
The loop on step 3 will zero the two arrays to prepare them for the squaring step. Step 4.1 computes the squares of the product. Note how
|
||||
it simply assigns the value into the $\hat X$ array. The nested loop on step 4.2 computes the doubles of the products. This loop
|
||||
computes the sum of the products for each column. They are not doubled until later.
|
||||
|
||||
After the squaring loop, the products stored in $\hat W$ musted be doubled and the carries propagated forwards. It makes sense to do both
|
||||
operations at the same time. The expression $\hat W_{ix} \leftarrow 2 \hat W_{ix} + \hat {X}_{ix}$ computes the sum of the double product and the
|
||||
squares in place.
|
||||
Finally the last difference is the addition of the ``square'' term outside the inner loop (step 5.8). We add in the square
|
||||
only to even outputs and it is the square of the term at the $\lfloor ix / 2 \rfloor$ position.
|
||||
|
||||
EXAM,bn_fast_s_mp_sqr.c
|
||||
|
||||
-- Write something deep and insightful later, Tom.
|
||||
This implementation is essentially a copy of Comba multiplication with the appropriate changes added to make it faster for
|
||||
the special case of squaring.
|
||||
|
||||
\subsection{Polynomial Basis Squaring}
|
||||
The same algorithm that performs optimal polynomial basis multiplication can be used to perform polynomial basis squaring. The minor exception
|
||||
@@ -3312,14 +3345,13 @@ By inlining the copy and shift operations the cutoff point for Karatsuba multipl
|
||||
is exactly at the point where Comba squaring can no longer be used (\textit{128 digits}). On slower processors such as the Intel P4
|
||||
it is actually below the Comba limit (\textit{at 110 digits}).
|
||||
|
||||
This routine uses the same error trap coding style as mp\_karatsuba\_sqr. As the temporary variables are initialized errors are redirected to
|
||||
the error trap higher up. If the algorithm completes without error the error code is set to \textbf{MP\_OKAY} and mp\_clears are executed normally.
|
||||
|
||||
\textit{Last paragraph sucks. re-write! -- Tom}
|
||||
This routine uses the same error trap coding style as mp\_karatsuba\_sqr. As the temporary variables are initialized errors are
|
||||
redirected to the error trap higher up. If the algorithm completes without error the error code is set to \textbf{MP\_OKAY} and
|
||||
mp\_clears are executed normally.
|
||||
|
||||
\subsection{Toom-Cook Squaring}
|
||||
The Toom-Cook squaring algorithm mp\_toom\_sqr is heavily based on the algorithm mp\_toom\_mul with the exception that squarings are used
|
||||
instead of multiplication to find the five relations.. The reader is encouraged to read the description of the latter algorithm and try to
|
||||
instead of multiplication to find the five relations. The reader is encouraged to read the description of the latter algorithm and try to
|
||||
derive their own Toom-Cook squaring algorithm.
|
||||
|
||||
\subsection{High Level Squaring}
|
||||
@@ -3362,12 +3394,9 @@ EXAM,bn_mp_sqr.c
|
||||
$\left [ 3 \right ] $ & Devise an efficient algorithm for selection of the radix point to handle inputs \\
|
||||
& that have different number of digits in Karatsuba multiplication. \\
|
||||
& \\
|
||||
$\left [ 3 \right ] $ & In ~SQUARE~ the fact that every column of a squaring is made up \\
|
||||
$\left [ 2 \right ] $ & In ~SQUARE~ the fact that every column of a squaring is made up \\
|
||||
& of double products and at most one square is stated. Prove this statement. \\
|
||||
& \\
|
||||
$\left [ 2 \right ] $ & In the Comba squaring algorithm half of the $\hat X$ variables are not used. \\
|
||||
& Revise algorithm fast\_s\_mp\_sqr to shrink the $\hat X$ array. \\
|
||||
& \\
|
||||
$\left [ 3 \right ] $ & Prove the equation for Karatsuba squaring. \\
|
||||
& \\
|
||||
$\left [ 1 \right ] $ & Prove that Karatsuba squaring requires $O \left (n^{lg(3)} \right )$ time. \\
|
||||
@@ -3375,6 +3404,14 @@ $\left [ 1 \right ] $ & Prove that Karatsuba squaring requires $O \left (n^{lg(3
|
||||
$\left [ 2 \right ] $ & Determine the minimal ratio between addition and multiplication clock cycles \\
|
||||
& required for equation $6.7$ to be true. \\
|
||||
& \\
|
||||
$\left [ 3 \right ] $ & Implement a threaded version of Comba multiplication (and squaring) where you \\
|
||||
& compute subsets of the columns in each thread. Determine a cutoff point where \\
|
||||
& it is effective and add the logic to mp\_mul() and mp\_sqr(). \\
|
||||
&\\
|
||||
$\left [ 4 \right ] $ & Same as the previous but also modify the Karatsuba and Toom-Cook. You must \\
|
||||
& increase the throughput of mp\_exptmod() for random odd moduli in the range \\
|
||||
& $512 \ldots 4096$ bits significantly ($> 2x$) to complete this challenge. \\
|
||||
& \\
|
||||
\end{tabular}
|
||||
|
||||
\chapter{Modular Reduction}
|
||||
@@ -3394,7 +3431,7 @@ other forms of residues.
|
||||
Modular reductions are normally used to create either finite groups, rings or fields. The most common usage for performance driven modular reductions
|
||||
is in modular exponentiation algorithms. That is to compute $d = a^b \mbox{ (mod }c\mbox{)}$ as fast as possible. This operation is used in the
|
||||
RSA and Diffie-Hellman public key algorithms, for example. Modular multiplication and squaring also appears as a fundamental operation in
|
||||
Elliptic Curve cryptographic algorithms. As will be discussed in the subsequent chapter there exist fast algorithms for computing modular
|
||||
elliptic curve cryptographic algorithms. As will be discussed in the subsequent chapter there exist fast algorithms for computing modular
|
||||
exponentiations without having to perform (\textit{in this example}) $b - 1$ multiplications. These algorithms will produce partial results in the
|
||||
range $0 \le x < c^2$ which can be taken advantage of to create several efficient algorithms. They have also been used to create redundancy check
|
||||
algorithms known as CRCs, error correction codes such as Reed-Solomon and solve a variety of number theoeretic problems.
|
||||
@@ -3610,7 +3647,7 @@ safe to do so.
|
||||
In order to use algorithm mp\_reduce the value of $\mu$ must be calculated in advance. Ideally this value should be computed once and stored for
|
||||
future use so that the Barrett algorithm can be used without delay.
|
||||
|
||||
\begin{figure}[!here]
|
||||
\newpage\begin{figure}[!here]
|
||||
\begin{small}
|
||||
\begin{center}
|
||||
\begin{tabular}{l}
|
||||
@@ -5818,6 +5855,8 @@ To explain the Jacobi Symbol we shall first discuss the Legendre function\footno
|
||||
defined. The Legendre function computes whether or not an integer $a$ is a quadratic residue modulo an odd prime $p$. Numerically it is
|
||||
equivalent to equation \ref{eqn:legendre}.
|
||||
|
||||
\textit{-- Tom, don't be an ass, cite your source here...!}
|
||||
|
||||
\begin{equation}
|
||||
a^{(p-1)/2} \equiv \begin{array}{rl}
|
||||
-1 & \mbox{if }a\mbox{ is a quadratic non-residue.} \\
|
||||
|
||||
@@ -90,8 +90,11 @@
|
||||
#define BN_MP_READ_UNSIGNED_BIN_C
|
||||
#define BN_MP_REDUCE_C
|
||||
#define BN_MP_REDUCE_2K_C
|
||||
#define BN_MP_REDUCE_2K_L_C
|
||||
#define BN_MP_REDUCE_2K_SETUP_C
|
||||
#define BN_MP_REDUCE_2K_SETUP_L_C
|
||||
#define BN_MP_REDUCE_IS_2K_C
|
||||
#define BN_MP_REDUCE_IS_2K_L_C
|
||||
#define BN_MP_REDUCE_SETUP_C
|
||||
#define BN_MP_RSHD_C
|
||||
#define BN_MP_SET_C
|
||||
@@ -105,7 +108,9 @@
|
||||
#define BN_MP_SUB_D_C
|
||||
#define BN_MP_SUBMOD_C
|
||||
#define BN_MP_TO_SIGNED_BIN_C
|
||||
#define BN_MP_TO_SIGNED_BIN_N_C
|
||||
#define BN_MP_TO_UNSIGNED_BIN_C
|
||||
#define BN_MP_TO_UNSIGNED_BIN_N_C
|
||||
#define BN_MP_TOOM_MUL_C
|
||||
#define BN_MP_TOOM_SQR_C
|
||||
#define BN_MP_TORADIX_C
|
||||
@@ -132,7 +137,7 @@
|
||||
#define BN_MP_ISEVEN_C
|
||||
#define BN_MP_INIT_MULTI_C
|
||||
#define BN_MP_COPY_C
|
||||
#define BN_MP_ABS_C
|
||||
#define BN_MP_MOD_C
|
||||
#define BN_MP_SET_C
|
||||
#define BN_MP_DIV_2_C
|
||||
#define BN_MP_ISODD_C
|
||||
@@ -242,6 +247,7 @@
|
||||
#define BN_MP_INIT_MULTI_C
|
||||
#define BN_MP_SET_C
|
||||
#define BN_MP_COUNT_BITS_C
|
||||
#define BN_MP_ABS_C
|
||||
#define BN_MP_MUL_2D_C
|
||||
#define BN_MP_CMP_C
|
||||
#define BN_MP_SUB_C
|
||||
@@ -323,11 +329,12 @@
|
||||
#define BN_MP_CLEAR_C
|
||||
#define BN_MP_ABS_C
|
||||
#define BN_MP_CLEAR_MULTI_C
|
||||
#define BN_MP_REDUCE_IS_2K_L_C
|
||||
#define BN_S_MP_EXPTMOD_C
|
||||
#define BN_MP_DR_IS_MODULUS_C
|
||||
#define BN_MP_REDUCE_IS_2K_C
|
||||
#define BN_MP_ISODD_C
|
||||
#define BN_MP_EXPTMOD_FAST_C
|
||||
#define BN_S_MP_EXPTMOD_C
|
||||
#endif
|
||||
|
||||
#if defined(BN_MP_EXPTMOD_FAST_C)
|
||||
@@ -359,6 +366,7 @@
|
||||
#define BN_MP_DIV_C
|
||||
#define BN_MP_MUL_C
|
||||
#define BN_MP_SUB_C
|
||||
#define BN_MP_NEG_C
|
||||
#define BN_MP_EXCH_C
|
||||
#define BN_MP_CLEAR_MULTI_C
|
||||
#endif
|
||||
@@ -433,6 +441,7 @@
|
||||
#if defined(BN_MP_INVMOD_SLOW_C)
|
||||
#define BN_MP_ISZERO_C
|
||||
#define BN_MP_INIT_MULTI_C
|
||||
#define BN_MP_MOD_C
|
||||
#define BN_MP_COPY_C
|
||||
#define BN_MP_ISEVEN_C
|
||||
#define BN_MP_SET_C
|
||||
@@ -724,6 +733,17 @@
|
||||
#define BN_MP_CLEAR_C
|
||||
#endif
|
||||
|
||||
#if defined(BN_MP_REDUCE_2K_L_C)
|
||||
#define BN_MP_INIT_C
|
||||
#define BN_MP_COUNT_BITS_C
|
||||
#define BN_MP_DIV_2D_C
|
||||
#define BN_MP_MUL_C
|
||||
#define BN_S_MP_ADD_C
|
||||
#define BN_MP_CMP_MAG_C
|
||||
#define BN_S_MP_SUB_C
|
||||
#define BN_MP_CLEAR_C
|
||||
#endif
|
||||
|
||||
#if defined(BN_MP_REDUCE_2K_SETUP_C)
|
||||
#define BN_MP_INIT_C
|
||||
#define BN_MP_COUNT_BITS_C
|
||||
@@ -732,11 +752,22 @@
|
||||
#define BN_S_MP_SUB_C
|
||||
#endif
|
||||
|
||||
#if defined(BN_MP_REDUCE_2K_SETUP_L_C)
|
||||
#define BN_MP_INIT_C
|
||||
#define BN_MP_2EXPT_C
|
||||
#define BN_MP_COUNT_BITS_C
|
||||
#define BN_S_MP_SUB_C
|
||||
#define BN_MP_CLEAR_C
|
||||
#endif
|
||||
|
||||
#if defined(BN_MP_REDUCE_IS_2K_C)
|
||||
#define BN_MP_REDUCE_2K_C
|
||||
#define BN_MP_COUNT_BITS_C
|
||||
#endif
|
||||
|
||||
#if defined(BN_MP_REDUCE_IS_2K_L_C)
|
||||
#endif
|
||||
|
||||
#if defined(BN_MP_REDUCE_SETUP_C)
|
||||
#define BN_MP_2EXPT_C
|
||||
#define BN_MP_DIV_C
|
||||
@@ -814,6 +845,11 @@
|
||||
#define BN_MP_TO_UNSIGNED_BIN_C
|
||||
#endif
|
||||
|
||||
#if defined(BN_MP_TO_SIGNED_BIN_N_C)
|
||||
#define BN_MP_SIGNED_BIN_SIZE_C
|
||||
#define BN_MP_TO_SIGNED_BIN_C
|
||||
#endif
|
||||
|
||||
#if defined(BN_MP_TO_UNSIGNED_BIN_C)
|
||||
#define BN_MP_INIT_COPY_C
|
||||
#define BN_MP_ISZERO_C
|
||||
@@ -821,6 +857,11 @@
|
||||
#define BN_MP_CLEAR_C
|
||||
#endif
|
||||
|
||||
#if defined(BN_MP_TO_UNSIGNED_BIN_N_C)
|
||||
#define BN_MP_UNSIGNED_BIN_SIZE_C
|
||||
#define BN_MP_TO_UNSIGNED_BIN_C
|
||||
#endif
|
||||
|
||||
#if defined(BN_MP_TOOM_MUL_C)
|
||||
#define BN_MP_INIT_MULTI_C
|
||||
#define BN_MP_MOD_2D_C
|
||||
@@ -901,10 +942,12 @@
|
||||
#define BN_MP_INIT_C
|
||||
#define BN_MP_CLEAR_C
|
||||
#define BN_MP_REDUCE_SETUP_C
|
||||
#define BN_MP_REDUCE_C
|
||||
#define BN_MP_REDUCE_2K_SETUP_L_C
|
||||
#define BN_MP_REDUCE_2K_L_C
|
||||
#define BN_MP_MOD_C
|
||||
#define BN_MP_COPY_C
|
||||
#define BN_MP_SQR_C
|
||||
#define BN_MP_REDUCE_C
|
||||
#define BN_MP_MUL_C
|
||||
#define BN_MP_SET_C
|
||||
#define BN_MP_EXCH_C
|
||||
|
||||
Reference in New Issue
Block a user