Compare commits

..

5 Commits

Author SHA1 Message Date
Matt Johnston
52b6ae0d97 Don't remove ~ files on make clean (and find -type was wrong anyway)
--HG--
branch : libtommath
extra : convert_revision : 38ad9968c111d9364b88e0098afc3e8e794e67e3
2005-05-11 16:27:28 +00:00
Matt Johnston
6ab9858dec Update Makefile.in for dropbear
--HG--
branch : libtommath
extra : convert_revision : 11081ab8c4f56bc850c99ac6c7af31de7b4bca0d
2005-05-11 16:25:34 +00:00
Matt Johnston
0ac65f3f84 Bringing back the original ltc 0.35 makefile
--HG--
branch : libtommath
extra : convert_revision : 64da6ef5a06ae239a1d89a38481722bf787548e3
2005-05-11 16:23:24 +00:00
Matt Johnston
a89b26e778 propagate from branch 'au.asn.ucc.matt.ltm-orig' (head 7fa10cba9535de3461cedb14b877c24858826204)
to branch 'au.asn.ucc.matt.dropbear.ltm' (head fc26f60de0370ab0a281fa41a2d13fb17c9d90a8)

--HG--
branch : libtommath
extra : convert_revision : 1b78d65cf59dad1c913ea81f892dccece58c2a0d
2005-05-11 16:15:27 +00:00
Matt Johnston
180935d8cd Initial import of libtommath 0.35
--HG--
branch : libtommath-orig
extra : convert_revision : b14c94b945e5c61de5bfa1a6816e11ed55cb2911
2005-05-06 08:59:30 +00:00
80 changed files with 6694 additions and 4009 deletions

View File

@@ -2,6 +2,9 @@
#
#Tom St Denis
#version of library
VERSION=0.35
VPATH=@srcdir@
srcdir=@srcdir@
@@ -11,7 +14,7 @@ CFLAGS += -I$(srcdir)
#CFLAGS += -I./ -Wall -W -Wshadow -Wsign-compare
#for speed
#CFLAGS += -O3 -funroll-loops
#CFLAGS += -O3 -funroll-all-loops
#for size
#CFLAGS += -Os
@@ -22,13 +25,15 @@ CFLAGS += -I$(srcdir)
#debug
#CFLAGS += -g3
VERSION=0.32
#install as this user
USER=root
GROUP=root
default: libtommath.a
#default files to install
LIBNAME=libtommath.a
HEADERS=tommath.h
HEADERS=tommath.h tommath_class.h tommath_superclass.h
#LIBPATH-The directory for libtommath to be installed to.
#INCPATH-The directory to install the header files for libtommath.
@@ -58,17 +63,18 @@ bn_mp_prime_is_prime.o bn_mp_prime_next_prime.o bn_mp_dr_reduce.o \
bn_mp_dr_is_modulus.o bn_mp_dr_setup.o bn_mp_reduce_setup.o \
bn_mp_toom_mul.o bn_mp_toom_sqr.o bn_mp_div_3.o bn_s_mp_exptmod.o \
bn_mp_reduce_2k.o bn_mp_reduce_is_2k.o bn_mp_reduce_2k_setup.o \
bn_mp_reduce_2k_l.o bn_mp_reduce_is_2k_l.o bn_mp_reduce_2k_setup_l.o \
bn_mp_radix_smap.o bn_mp_read_radix.o bn_mp_toradix.o bn_mp_radix_size.o \
bn_mp_fread.o bn_mp_fwrite.o bn_mp_cnt_lsb.o bn_error.o \
bn_mp_init_multi.o bn_mp_clear_multi.o bn_mp_exteuclid.o bn_mp_toradix_n.o \
bn_mp_prime_random_ex.o bn_mp_get_int.o bn_mp_sqrt.o bn_mp_is_square.o bn_mp_init_set.o \
bn_mp_init_set_int.o bn_mp_invmod_slow.o bn_mp_prime_rabin_miller_trials.o
bn_mp_init_set_int.o bn_mp_invmod_slow.o bn_mp_prime_rabin_miller_trials.o \
bn_mp_to_signed_bin_n.o bn_mp_to_unsigned_bin_n.o
libtommath.a: $(OBJECTS)
$(AR) $(ARFLAGS) libtommath.a $(OBJECTS)
$(RANLIB) libtommath.a
#make a profiled library (takes a while!!!)
#
# This will build the library with profile generation
@@ -93,19 +99,19 @@ profiled_single:
ranlib libtommath.a
install: libtommath.a
install -d -g root -o root $(DESTDIR)$(LIBPATH)
install -d -g root -o root $(DESTDIR)$(INCPATH)
install -g root -o root $(LIBNAME) $(DESTDIR)$(LIBPATH)
install -g root -o root $(HEADERS) $(DESTDIR)$(INCPATH)
install -d -g $(GROUP) -o $(USER) $(DESTDIR)$(LIBPATH)
install -d -g $(GROUP) -o $(USER) $(DESTDIR)$(INCPATH)
install -g $(GROUP) -o $(USER) $(LIBNAME) $(DESTDIR)$(LIBPATH)
install -g $(GROUP) -o $(USER) $(HEADERS) $(DESTDIR)$(INCPATH)
test: libtommath.a demo/demo.o
$(CC) demo/demo.o libtommath.a -o test
$(CC) $(CFLAGS) demo/demo.o libtommath.a -o test
mtest: test
cd mtest ; $(CC) $(CFLAGS) mtest.c -o mtest -s
cd mtest ; $(CC) $(CFLAGS) mtest.c -o mtest
timing: libtommath.a
$(CC) $(CFLAGS) -DTIMER demo/timing.c libtommath.a -o ltmtest -s
$(CC) $(CFLAGS) -DTIMER demo/timing.c libtommath.a -o ltmtest
# makes the LTM book DVI file, requires tetex, perl and makeindex [part of tetex I think]
docdvi: tommath.src
@@ -145,11 +151,11 @@ pretty:
perl pretty.build
clean:
-rm -f *.bat *.pdf *.o *.a *.obj *.lib *.exe *.dll etclib/*.o demo/demo.o test ltmtest mpitest mtest/mtest mtest/mtest.exe \
*.idx *.toc *.log *.aux *.dvi *.lof *.ind *.ilg *.ps *.log *.s mpi.c *.da *.dyn *.dpi tommath.tex `find . -type f | grep [~] | xargs` *.lo *.la
-rm -rf .libs
-cd etc && make clean
-cd pics && make clean
rm -f *.bat *.pdf *.o *.a *.obj *.lib *.exe *.dll etclib/*.o demo/demo.o test ltmtest mpitest mtest/mtest mtest/mtest.exe \
*.idx *.toc *.log *.aux *.dvi *.lof *.ind *.ilg *.ps *.log *.s mpi.c *.da *.dyn *.dpi tommath.tex *.lo *.la
rm -rf .libs
cd etc && make clean
cd pics && make clean
zipup: clean manual poster docs
perl gen.pl ; mv mpi.c pre_gen/ ; \

16
TODO Normal file
View File

@@ -0,0 +1,16 @@
things for book in order of importance...
- Fix up pseudo-code [only] for combas that are not consistent with source
- Start in chapter 3 [basics] and work up...
- re-write to prose [less abrupt]
- clean up pseudo code [spacing]
- more examples where appropriate and figures
Goal:
- Get sync done by mid January [roughly 8-12 hours work]
- Finish ch3-6 by end of January [roughly 12-16 hours of work]
- Finish ch7-end by mid Feb [roughly 20-24 hours of work].
Goal isn't "first edition" but merely cleaner to read.

29
bn.tex
View File

@@ -49,7 +49,7 @@
\begin{document}
\frontmatter
\pagestyle{empty}
\title{LibTomMath User Manual \\ v0.32}
\title{LibTomMath User Manual \\ v0.35}
\author{Tom St Denis \\ tomstdenis@iahu.ca}
\maketitle
This text, the library and the accompanying textbook are all hereby placed in the public domain. This book has been
@@ -263,12 +263,12 @@ are the pros and cons of LibTomMath by comparing it to the math routines from Gn
\begin{center}
\begin{tabular}{|l|c|c|l|}
\hline \textbf{Criteria} & \textbf{Pro} & \textbf{Con} & \textbf{Notes} \\
\hline Few lines of code per file & X & & GnuPG $ = 300.9$, LibTomMath $ = 76.04$ \\
\hline Few lines of code per file & X & & GnuPG $ = 300.9$, LibTomMath $ = 71.97$ \\
\hline Commented function prototypes & X && GnuPG function names are cryptic. \\
\hline Speed && X & LibTomMath is slower. \\
\hline Totally free & X & & GPL has unfavourable restrictions.\\
\hline Large function base & X & & GnuPG is barebones. \\
\hline Four modular reduction algorithms & X & & Faster modular exponentiation. \\
\hline Five modular reduction algorithms & X & & Faster modular exponentiation for a variety of moduli. \\
\hline Portable & X & & GnuPG requires configuration to build. \\
\hline
\end{tabular}
@@ -284,9 +284,12 @@ would require when working with large integers.
So it may feel tempting to just rip the math code out of GnuPG (or GnuMP where it was taken from originally) in your
own application but I think there are reasons not to. While LibTomMath is slower than libraries such as GnuMP it is
not normally significantly slower. On x86 machines the difference is normally a factor of two when performing modular
exponentiations.
exponentiations. It depends largely on the processor, compiler and the moduli being used.
Essentially the only time you wouldn't use LibTomMath is when blazing speed is the primary concern.
Essentially the only time you wouldn't use LibTomMath is when blazing speed is the primary concern. However,
on the other side of the coin LibTomMath offers you a totally free (public domain) well structured math library
that is very flexible, complete and performs well in resource contrained environments. Fast RSA for example can
be performed with as little as 8KB of ram for data (again depending on build options).
\chapter{Getting Started with LibTomMath}
\section{Building Programs}
@@ -809,7 +812,7 @@ mp\_int variables based on their digits only.
\index{mp\_cmp\_mag}
\begin{alltt}
int mp_cmp(mp_int * a, mp_int * b);
int mp_cmp_mag(mp_int * a, mp_int * b);
\end{alltt}
This will compare $a$ to $b$ placing $a$ to the left of $b$. This function cannot fail and will return one of the
three compare codes listed in figure \ref{fig:CMP}.
@@ -1220,12 +1223,13 @@ int mp_sqr (mp_int * a, mp_int * b);
\end{alltt}
Will square $a$ and store it in $b$. Like the case of multiplication there are four different squaring
algorithms all which can be called from mp\_sqr(). It is ideal to use mp\_sqr over mp\_mul when squaring terms.
algorithms all which can be called from mp\_sqr(). It is ideal to use mp\_sqr over mp\_mul when squaring terms because
of the speed difference.
\section{Tuning Polynomial Basis Routines}
Both of the Toom-Cook and Karatsuba multiplication algorithms are faster than the traditional $O(n^2)$ approach that
the Comba and baseline algorithms use. At $O(n^{1.464973})$ and $O(n^{1.584962})$ running times respectfully they require
the Comba and baseline algorithms use. At $O(n^{1.464973})$ and $O(n^{1.584962})$ running times respectively they require
considerably less work. For example, a 10000-digit multiplication would take roughly 724,000 single precision
multiplications with Toom-Cook or 100,000,000 single precision multiplications with the standard Comba (a factor
of 138).
@@ -1297,14 +1301,14 @@ of $b$. This algorithm accepts an input $a$ of any range and is not limited by
\section{Barrett Reduction}
Barrett reduction is a generic optimized reduction algorithm that requires pre--computation to achieve
a decent speedup over straight division. First a $mu$ value must be precomputed with the following function.
a decent speedup over straight division. First a $\mu$ value must be precomputed with the following function.
\index{mp\_reduce\_setup}
\begin{alltt}
int mp_reduce_setup(mp_int *a, mp_int *b);
\end{alltt}
Given a modulus in $b$ this produces the required $mu$ value in $a$. For any given modulus this only has to
Given a modulus in $b$ this produces the required $\mu$ value in $a$. For any given modulus this only has to
be computed once. Modular reduction can now be performed with the following.
\index{mp\_reduce}
@@ -1312,7 +1316,7 @@ be computed once. Modular reduction can now be performed with the following.
int mp_reduce(mp_int *a, mp_int *b, mp_int *c);
\end{alltt}
This will reduce $a$ in place modulo $b$ with the precomputed $mu$ value in $c$. $a$ must be in the range
This will reduce $a$ in place modulo $b$ with the precomputed $\mu$ value in $c$. $a$ must be in the range
$0 \le a < b^2$.
\begin{alltt}
@@ -1578,7 +1582,8 @@ will return $-2$.
This algorithm uses the ``Newton Approximation'' method and will converge on the correct root fairly quickly. Since
the algorithm requires raising $a$ to the power of $b$ it is not ideal to attempt to find roots for large
values of $b$. If particularly large roots are required then a factor method could be used instead. For example,
$a^{1/16}$ is equivalent to $\left (a^{1/4} \right)^{1/4}$.
$a^{1/16}$ is equivalent to $\left (a^{1/4} \right)^{1/4}$ or simply
$\left ( \left ( \left ( a^{1/2} \right )^{1/2} \right )^{1/2} \right )^{1/2}$
\chapter{Prime Numbers}
\section{Trial Division}

View File

@@ -21,8 +21,7 @@
* Based on slow invmod except this is optimized for the case where b is
* odd as per HAC Note 14.64 on pp. 610
*/
int
fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
int fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
{
mp_int x, y, u, v, B, D;
int res, neg;
@@ -39,20 +38,20 @@ fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
/* x == modulus, y == value to invert */
if ((res = mp_copy (b, &x)) != MP_OKAY) {
goto __ERR;
goto LBL_ERR;
}
/* we need y = |a| */
if ((res = mp_abs (a, &y)) != MP_OKAY) {
goto __ERR;
if ((res = mp_mod (a, b, &y)) != MP_OKAY) {
goto LBL_ERR;
}
/* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
if ((res = mp_copy (&x, &u)) != MP_OKAY) {
goto __ERR;
goto LBL_ERR;
}
if ((res = mp_copy (&y, &v)) != MP_OKAY) {
goto __ERR;
goto LBL_ERR;
}
mp_set (&D, 1);
@@ -61,17 +60,17 @@ top:
while (mp_iseven (&u) == 1) {
/* 4.1 u = u/2 */
if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
goto __ERR;
goto LBL_ERR;
}
/* 4.2 if B is odd then */
if (mp_isodd (&B) == 1) {
if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
goto __ERR;
goto LBL_ERR;
}
}
/* B = B/2 */
if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
goto __ERR;
goto LBL_ERR;
}
}
@@ -79,18 +78,18 @@ top:
while (mp_iseven (&v) == 1) {
/* 5.1 v = v/2 */
if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
goto __ERR;
goto LBL_ERR;
}
/* 5.2 if D is odd then */
if (mp_isodd (&D) == 1) {
/* D = (D-x)/2 */
if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
goto __ERR;
goto LBL_ERR;
}
}
/* D = D/2 */
if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
goto __ERR;
goto LBL_ERR;
}
}
@@ -98,20 +97,20 @@ top:
if (mp_cmp (&u, &v) != MP_LT) {
/* u = u - v, B = B - D */
if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
goto __ERR;
goto LBL_ERR;
}
if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
goto __ERR;
goto LBL_ERR;
}
} else {
/* v - v - u, D = D - B */
if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
goto __ERR;
goto LBL_ERR;
}
if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
goto __ERR;
goto LBL_ERR;
}
}
@@ -125,21 +124,21 @@ top:
/* if v != 1 then there is no inverse */
if (mp_cmp_d (&v, 1) != MP_EQ) {
res = MP_VAL;
goto __ERR;
goto LBL_ERR;
}
/* b is now the inverse */
neg = a->sign;
while (D.sign == MP_NEG) {
if ((res = mp_add (&D, b, &D)) != MP_OKAY) {
goto __ERR;
goto LBL_ERR;
}
}
mp_exch (&D, c);
c->sign = neg;
res = MP_OKAY;
__ERR:mp_clear_multi (&x, &y, &u, &v, &B, &D, NULL);
LBL_ERR:mp_clear_multi (&x, &y, &u, &v, &B, &D, NULL);
return res;
}
#endif

View File

@@ -23,8 +23,7 @@
*
* Based on Algorithm 14.32 on pp.601 of HAC.
*/
int
fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
{
int ix, res, olduse;
mp_word W[MP_WARRAY];

View File

@@ -31,8 +31,7 @@
* Based on Algorithm 14.12 on pp.595 of HAC.
*
*/
int
fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
{
int olduse, res, pa, ix, iz;
mp_digit W[MP_WARRAY];
@@ -50,7 +49,7 @@ fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
/* clear the carry */
_W = 0;
for (ix = 0; ix <= pa; ix++) {
for (ix = 0; ix < pa; ix++) {
int tx, ty;
int iy;
mp_digit *tmpx, *tmpy;
@@ -63,7 +62,7 @@ fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
tmpx = a->dp + tx;
tmpy = b->dp + ty;
/* this is the number of times the loop will iterrate, essentially its
/* this is the number of times the loop will iterrate, essentially
while (tx++ < a->used && ty-- >= 0) { ... }
*/
iy = MIN(a->used-tx, ty+1);
@@ -80,14 +79,17 @@ fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
_W = _W >> ((mp_word)DIGIT_BIT);
}
/* store final carry */
W[ix] = (mp_digit)(_W & MP_MASK);
/* setup dest */
olduse = c->used;
c->used = digs;
c->used = pa;
{
register mp_digit *tmpc;
tmpc = c->dp;
for (ix = 0; ix < digs; ix++) {
for (ix = 0; ix < pa+1; ix++) {
/* now extract the previous digit [below the carry] */
*tmpc++ = W[ix];
}

View File

@@ -24,8 +24,7 @@
*
* Based on Algorithm 14.12 on pp.595 of HAC.
*/
int
fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
{
int olduse, res, pa, ix, iz;
mp_digit W[MP_WARRAY];
@@ -42,7 +41,7 @@ fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
/* number of output digits to produce */
pa = a->used + b->used;
_W = 0;
for (ix = digs; ix <= pa; ix++) {
for (ix = digs; ix < pa; ix++) {
int tx, ty, iy;
mp_digit *tmpx, *tmpy;
@@ -70,6 +69,9 @@ fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
/* make next carry */
_W = _W >> ((mp_word)DIGIT_BIT);
}
/* store final carry */
W[ix] = (mp_digit)(_W & MP_MASK);
/* setup dest */
olduse = c->used;

View File

@@ -15,33 +15,14 @@
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
*/
/* fast squaring
*
* This is the comba method where the columns of the product
* are computed first then the carries are computed. This
* has the effect of making a very simple inner loop that
* is executed the most
*
* W2 represents the outer products and W the inner.
*
* A further optimizations is made because the inner
* products are of the form "A * B * 2". The *2 part does
* not need to be computed until the end which is good
* because 64-bit shifts are slow!
*
* Based on Algorithm 14.16 on pp.597 of HAC.
*
*/
/* the jist of squaring...
you do like mult except the offset of the tmpx [one that starts closer to zero]
can't equal the offset of tmpy. So basically you set up iy like before then you min it with
(ty-tx) so that it never happens. You double all those you add in the inner loop
* you do like mult except the offset of the tmpx [one that
* starts closer to zero] can't equal the offset of tmpy.
* So basically you set up iy like before then you min it with
* (ty-tx) so that it never happens. You double all those
* you add in the inner loop
After that loop you do the squares and add them in.
Remove W2 and don't memset W
*/
int fast_s_mp_sqr (mp_int * a, mp_int * b)
@@ -60,7 +41,7 @@ int fast_s_mp_sqr (mp_int * a, mp_int * b)
/* number of output digits to produce */
W1 = 0;
for (ix = 0; ix <= pa; ix++) {
for (ix = 0; ix < pa; ix++) {
int tx, ty, iy;
mp_word _W;
mp_digit *tmpy;
@@ -76,7 +57,7 @@ int fast_s_mp_sqr (mp_int * a, mp_int * b)
tmpx = a->dp + tx;
tmpy = a->dp + ty;
/* this is the number of times the loop will iterrate, essentially its
/* this is the number of times the loop will iterrate, essentially
while (tx++ < a->used && ty-- >= 0) { ... }
*/
iy = MIN(a->used-tx, ty+1);
@@ -101,7 +82,7 @@ int fast_s_mp_sqr (mp_int * a, mp_int * b)
}
/* store it */
W[ix] = _W;
W[ix] = (mp_digit)(_W & MP_MASK);
/* make next carry */
W1 = _W >> ((mp_word)DIGIT_BIT);

View File

@@ -49,23 +49,23 @@ int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d)
mp_set(&tq, 1);
n = mp_count_bits(a) - mp_count_bits(b);
if (((res = mp_copy(a, &ta)) != MP_OKAY) ||
((res = mp_copy(b, &tb)) != MP_OKAY) ||
if (((res = mp_abs(a, &ta)) != MP_OKAY) ||
((res = mp_abs(b, &tb)) != MP_OKAY) ||
((res = mp_mul_2d(&tb, n, &tb)) != MP_OKAY) ||
((res = mp_mul_2d(&tq, n, &tq)) != MP_OKAY)) {
goto __ERR;
goto LBL_ERR;
}
while (n-- >= 0) {
if (mp_cmp(&tb, &ta) != MP_GT) {
if (((res = mp_sub(&ta, &tb, &ta)) != MP_OKAY) ||
((res = mp_add(&q, &tq, &q)) != MP_OKAY)) {
goto __ERR;
goto LBL_ERR;
}
}
if (((res = mp_div_2d(&tb, 1, &tb, NULL)) != MP_OKAY) ||
((res = mp_div_2d(&tq, 1, &tq, NULL)) != MP_OKAY)) {
goto __ERR;
goto LBL_ERR;
}
}
@@ -74,13 +74,13 @@ int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d)
n2 = (a->sign == b->sign ? MP_ZPOS : MP_NEG);
if (c != NULL) {
mp_exch(c, &q);
c->sign = n2;
c->sign = (mp_iszero(c) == MP_YES) ? MP_ZPOS : n2;
}
if (d != NULL) {
mp_exch(d, &ta);
d->sign = n;
d->sign = (mp_iszero(d) == MP_YES) ? MP_ZPOS : n;
}
__ERR:
LBL_ERR:
mp_clear_multi(&ta, &tb, &tq, &q, NULL);
return res;
}
@@ -129,19 +129,19 @@ int mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
q.used = a->used + 2;
if ((res = mp_init (&t1)) != MP_OKAY) {
goto __Q;
goto LBL_Q;
}
if ((res = mp_init (&t2)) != MP_OKAY) {
goto __T1;
goto LBL_T1;
}
if ((res = mp_init_copy (&x, a)) != MP_OKAY) {
goto __T2;
goto LBL_T2;
}
if ((res = mp_init_copy (&y, b)) != MP_OKAY) {
goto __X;
goto LBL_X;
}
/* fix the sign */
@@ -153,10 +153,10 @@ int mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
if (norm < (int)(DIGIT_BIT-1)) {
norm = (DIGIT_BIT-1) - norm;
if ((res = mp_mul_2d (&x, norm, &x)) != MP_OKAY) {
goto __Y;
goto LBL_Y;
}
if ((res = mp_mul_2d (&y, norm, &y)) != MP_OKAY) {
goto __Y;
goto LBL_Y;
}
} else {
norm = 0;
@@ -168,13 +168,13 @@ int mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
/* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */
if ((res = mp_lshd (&y, n - t)) != MP_OKAY) { /* y = y*b**{n-t} */
goto __Y;
goto LBL_Y;
}
while (mp_cmp (&x, &y) != MP_LT) {
++(q.dp[n - t]);
if ((res = mp_sub (&x, &y, &x)) != MP_OKAY) {
goto __Y;
goto LBL_Y;
}
}
@@ -216,7 +216,7 @@ int mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
t1.dp[1] = y.dp[t];
t1.used = 2;
if ((res = mp_mul_d (&t1, q.dp[i - t - 1], &t1)) != MP_OKAY) {
goto __Y;
goto LBL_Y;
}
/* find right hand */
@@ -228,27 +228,27 @@ int mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
/* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */
if ((res = mp_mul_d (&y, q.dp[i - t - 1], &t1)) != MP_OKAY) {
goto __Y;
goto LBL_Y;
}
if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
goto __Y;
goto LBL_Y;
}
if ((res = mp_sub (&x, &t1, &x)) != MP_OKAY) {
goto __Y;
goto LBL_Y;
}
/* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */
if (x.sign == MP_NEG) {
if ((res = mp_copy (&y, &t1)) != MP_OKAY) {
goto __Y;
goto LBL_Y;
}
if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
goto __Y;
goto LBL_Y;
}
if ((res = mp_add (&x, &t1, &x)) != MP_OKAY) {
goto __Y;
goto LBL_Y;
}
q.dp[i - t - 1] = (q.dp[i - t - 1] - 1UL) & MP_MASK;
@@ -275,11 +275,11 @@ int mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
res = MP_OKAY;
__Y:mp_clear (&y);
__X:mp_clear (&x);
__T2:mp_clear (&t2);
__T1:mp_clear (&t1);
__Q:mp_clear (&q);
LBL_Y:mp_clear (&y);
LBL_X:mp_clear (&x);
LBL_T2:mp_clear (&t2);
LBL_T1:mp_clear (&t1);
LBL_Q:mp_clear (&q);
return res;
}

View File

@@ -20,7 +20,7 @@
* Based on algorithm from the paper
*
* "Generating Efficient Primes for Discrete Log Cryptosystems"
* Chae Hoon Lim, Pil Loong Lee,
* Chae Hoon Lim, Pil Joong Lee,
* POSTECH Information Research Laboratories
*
* The modulus must be of a special format [see manual]

View File

@@ -61,25 +61,33 @@ int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
return err;
#else
/* no invmod */
return MP_VAL
return MP_VAL;
#endif
}
/* modified diminished radix reduction */
#if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C)
if (mp_reduce_is_2k_l(P) == MP_YES) {
return s_mp_exptmod(G, X, P, Y, 1);
}
#endif
#ifdef BN_MP_DR_IS_MODULUS_C
/* is it a DR modulus? */
dr = mp_dr_is_modulus(P);
#else
/* default to no */
dr = 0;
#endif
#ifdef BN_MP_REDUCE_IS_2K_C
/* if not, is it a uDR modulus? */
/* if not, is it a unrestricted DR modulus? */
if (dr == 0) {
dr = mp_reduce_is_2k(P) << 1;
}
#endif
/* if the modulus is odd or dr != 0 use the fast method */
/* if the modulus is odd or dr != 0 use the montgomery method */
#ifdef BN_MP_EXPTMOD_FAST_C
if (mp_isodd (P) == 1 || dr != 0) {
return mp_exptmod_fast (G, X, P, Y, dr);
@@ -87,7 +95,7 @@ int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
#endif
#ifdef BN_S_MP_EXPTMOD_C
/* otherwise use the generic Barrett reduction technique */
return s_mp_exptmod (G, X, P, Y);
return s_mp_exptmod (G, X, P, Y, 0);
#else
/* no exptmod for evens */
return MP_VAL;

View File

@@ -29,8 +29,7 @@
#define TAB_SIZE 256
#endif
int
mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
{
mp_int M[TAB_SIZE], res;
mp_digit buf, mp;
@@ -88,11 +87,11 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
#ifdef BN_MP_MONTGOMERY_SETUP_C
/* now setup montgomery */
if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
goto __M;
goto LBL_M;
}
#else
err = MP_VAL;
goto __M;
goto LBL_M;
#endif
/* automatically pick the comba one if available (saves quite a few calls/ifs) */
@@ -108,7 +107,7 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
redux = mp_montgomery_reduce;
#else
err = MP_VAL;
goto __M;
goto LBL_M;
#endif
}
} else if (redmode == 1) {
@@ -118,24 +117,24 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
redux = mp_dr_reduce;
#else
err = MP_VAL;
goto __M;
goto LBL_M;
#endif
} else {
#if defined(BN_MP_REDUCE_2K_SETUP_C) && defined(BN_MP_REDUCE_2K_C)
/* setup DR reduction for moduli of the form 2**k - b */
if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
goto __M;
goto LBL_M;
}
redux = mp_reduce_2k;
#else
err = MP_VAL;
goto __M;
goto LBL_M;
#endif
}
/* setup result */
if ((err = mp_init (&res)) != MP_OKAY) {
goto __M;
goto LBL_M;
}
/* create M table
@@ -149,45 +148,45 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
#ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
/* now we need R mod m */
if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
goto __RES;
goto LBL_RES;
}
#else
err = MP_VAL;
goto __RES;
goto LBL_RES;
#endif
/* now set M[1] to G * R mod m */
if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) {
goto __RES;
goto LBL_RES;
}
} else {
mp_set(&res, 1);
if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
goto __RES;
goto LBL_RES;
}
}
/* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
goto __RES;
goto LBL_RES;
}
for (x = 0; x < (winsize - 1); x++) {
if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) {
goto __RES;
goto LBL_RES;
}
if ((err = redux (&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) {
goto __RES;
goto LBL_RES;
}
}
/* create upper table */
for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
goto __RES;
goto LBL_RES;
}
if ((err = redux (&M[x], P, mp)) != MP_OKAY) {
goto __RES;
goto LBL_RES;
}
}
@@ -227,10 +226,10 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
/* if the bit is zero and mode == 1 then we square */
if (mode == 1 && y == 0) {
if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
goto __RES;
goto LBL_RES;
}
if ((err = redux (&res, P, mp)) != MP_OKAY) {
goto __RES;
goto LBL_RES;
}
continue;
}
@@ -244,19 +243,19 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
/* square first */
for (x = 0; x < winsize; x++) {
if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
goto __RES;
goto LBL_RES;
}
if ((err = redux (&res, P, mp)) != MP_OKAY) {
goto __RES;
goto LBL_RES;
}
}
/* then multiply */
if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
goto __RES;
goto LBL_RES;
}
if ((err = redux (&res, P, mp)) != MP_OKAY) {
goto __RES;
goto LBL_RES;
}
/* empty window and reset */
@@ -271,10 +270,10 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
/* square then multiply if the bit is set */
for (x = 0; x < bitcpy; x++) {
if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
goto __RES;
goto LBL_RES;
}
if ((err = redux (&res, P, mp)) != MP_OKAY) {
goto __RES;
goto LBL_RES;
}
/* get next bit of the window */
@@ -282,10 +281,10 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
if ((bitbuf & (1 << winsize)) != 0) {
/* then multiply */
if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
goto __RES;
goto LBL_RES;
}
if ((err = redux (&res, P, mp)) != MP_OKAY) {
goto __RES;
goto LBL_RES;
}
}
}
@@ -299,15 +298,15 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
* of R.
*/
if ((err = redux(&res, P, mp)) != MP_OKAY) {
goto __RES;
goto LBL_RES;
}
}
/* swap res with Y */
mp_exch (&res, Y);
err = MP_OKAY;
__RES:mp_clear (&res);
__M:
LBL_RES:mp_clear (&res);
LBL_M:
mp_clear(&M[1]);
for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
mp_clear (&M[x]);

View File

@@ -59,6 +59,13 @@ int mp_exteuclid(mp_int *a, mp_int *b, mp_int *U1, mp_int *U2, mp_int *U3)
if ((err = mp_copy(&t3, &v3)) != MP_OKAY) { goto _ERR; }
}
/* make sure U3 >= 0 */
if (u3.sign == MP_NEG) {
mp_neg(&u1, &u1);
mp_neg(&u2, &u2);
mp_neg(&u3, &u3);
}
/* copy result out */
if (U1 != NULL) { mp_exch(U1, &u1); }
if (U2 != NULL) { mp_exch(U2, &u2); }

View File

@@ -43,7 +43,7 @@ int mp_gcd (mp_int * a, mp_int * b, mp_int * c)
}
if ((res = mp_init_copy (&v, b)) != MP_OKAY) {
goto __U;
goto LBL_U;
}
/* must be positive for the remainder of the algorithm */
@@ -57,24 +57,24 @@ int mp_gcd (mp_int * a, mp_int * b, mp_int * c)
if (k > 0) {
/* divide the power of two out */
if ((res = mp_div_2d(&u, k, &u, NULL)) != MP_OKAY) {
goto __V;
goto LBL_V;
}
if ((res = mp_div_2d(&v, k, &v, NULL)) != MP_OKAY) {
goto __V;
goto LBL_V;
}
}
/* divide any remaining factors of two out */
if (u_lsb != k) {
if ((res = mp_div_2d(&u, u_lsb - k, &u, NULL)) != MP_OKAY) {
goto __V;
goto LBL_V;
}
}
if (v_lsb != k) {
if ((res = mp_div_2d(&v, v_lsb - k, &v, NULL)) != MP_OKAY) {
goto __V;
goto LBL_V;
}
}
@@ -87,23 +87,23 @@ int mp_gcd (mp_int * a, mp_int * b, mp_int * c)
/* subtract smallest from largest */
if ((res = s_mp_sub(&v, &u, &v)) != MP_OKAY) {
goto __V;
goto LBL_V;
}
/* Divide out all factors of two */
if ((res = mp_div_2d(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) {
goto __V;
goto LBL_V;
}
}
/* multiply by 2**k which we divided out at the beginning */
if ((res = mp_mul_2d (&u, k, c)) != MP_OKAY) {
goto __V;
goto LBL_V;
}
c->sign = MP_ZPOS;
res = MP_OKAY;
__V:mp_clear (&u);
__U:mp_clear (&v);
LBL_V:mp_clear (&u);
LBL_U:mp_clear (&v);
return res;
}
#endif

View File

@@ -33,25 +33,25 @@ int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c)
}
/* x = a, y = b */
if ((res = mp_copy (a, &x)) != MP_OKAY) {
goto __ERR;
if ((res = mp_mod(a, b, &x)) != MP_OKAY) {
goto LBL_ERR;
}
if ((res = mp_copy (b, &y)) != MP_OKAY) {
goto __ERR;
goto LBL_ERR;
}
/* 2. [modified] if x,y are both even then return an error! */
if (mp_iseven (&x) == 1 && mp_iseven (&y) == 1) {
res = MP_VAL;
goto __ERR;
goto LBL_ERR;
}
/* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
if ((res = mp_copy (&x, &u)) != MP_OKAY) {
goto __ERR;
goto LBL_ERR;
}
if ((res = mp_copy (&y, &v)) != MP_OKAY) {
goto __ERR;
goto LBL_ERR;
}
mp_set (&A, 1);
mp_set (&D, 1);
@@ -61,24 +61,24 @@ top:
while (mp_iseven (&u) == 1) {
/* 4.1 u = u/2 */
if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
goto __ERR;
goto LBL_ERR;
}
/* 4.2 if A or B is odd then */
if (mp_isodd (&A) == 1 || mp_isodd (&B) == 1) {
/* A = (A+y)/2, B = (B-x)/2 */
if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
goto __ERR;
goto LBL_ERR;
}
if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
goto __ERR;
goto LBL_ERR;
}
}
/* A = A/2, B = B/2 */
if ((res = mp_div_2 (&A, &A)) != MP_OKAY) {
goto __ERR;
goto LBL_ERR;
}
if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
goto __ERR;
goto LBL_ERR;
}
}
@@ -86,24 +86,24 @@ top:
while (mp_iseven (&v) == 1) {
/* 5.1 v = v/2 */
if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
goto __ERR;
goto LBL_ERR;
}
/* 5.2 if C or D is odd then */
if (mp_isodd (&C) == 1 || mp_isodd (&D) == 1) {
/* C = (C+y)/2, D = (D-x)/2 */
if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
goto __ERR;
goto LBL_ERR;
}
if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
goto __ERR;
goto LBL_ERR;
}
}
/* C = C/2, D = D/2 */
if ((res = mp_div_2 (&C, &C)) != MP_OKAY) {
goto __ERR;
goto LBL_ERR;
}
if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
goto __ERR;
goto LBL_ERR;
}
}
@@ -111,28 +111,28 @@ top:
if (mp_cmp (&u, &v) != MP_LT) {
/* u = u - v, A = A - C, B = B - D */
if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
goto __ERR;
goto LBL_ERR;
}
if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) {
goto __ERR;
goto LBL_ERR;
}
if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
goto __ERR;
goto LBL_ERR;
}
} else {
/* v - v - u, C = C - A, D = D - B */
if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
goto __ERR;
goto LBL_ERR;
}
if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) {
goto __ERR;
goto LBL_ERR;
}
if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
goto __ERR;
goto LBL_ERR;
}
}
@@ -145,27 +145,27 @@ top:
/* if v != 1 then there is no inverse */
if (mp_cmp_d (&v, 1) != MP_EQ) {
res = MP_VAL;
goto __ERR;
goto LBL_ERR;
}
/* if its too low */
while (mp_cmp_d(&C, 0) == MP_LT) {
if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
goto __ERR;
goto LBL_ERR;
}
}
/* too big */
while (mp_cmp_mag(&C, b) != MP_LT) {
if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
goto __ERR;
goto LBL_ERR;
}
}
/* C is now the inverse */
mp_exch (&C, c);
res = MP_OKAY;
__ERR:mp_clear_multi (&x, &y, &u, &v, &A, &B, &C, &D, NULL);
LBL_ERR:mp_clear_multi (&x, &y, &u, &v, &A, &B, &C, &D, NULL);
return res;
}
#endif

View File

@@ -50,13 +50,13 @@ int mp_jacobi (mp_int * a, mp_int * p, int *c)
}
if ((res = mp_init (&p1)) != MP_OKAY) {
goto __A1;
goto LBL_A1;
}
/* divide out larger power of two */
k = mp_cnt_lsb(&a1);
if ((res = mp_div_2d(&a1, k, &a1, NULL)) != MP_OKAY) {
goto __P1;
goto LBL_P1;
}
/* step 4. if e is even set s=1 */
@@ -84,18 +84,18 @@ int mp_jacobi (mp_int * a, mp_int * p, int *c)
} else {
/* n1 = n mod a1 */
if ((res = mp_mod (p, &a1, &p1)) != MP_OKAY) {
goto __P1;
goto LBL_P1;
}
if ((res = mp_jacobi (&p1, &a1, &r)) != MP_OKAY) {
goto __P1;
goto LBL_P1;
}
*c = s * r;
}
/* done */
res = MP_OKAY;
__P1:mp_clear (&p1);
__A1:mp_clear (&a1);
LBL_P1:mp_clear (&p1);
LBL_A1:mp_clear (&a1);
return res;
}
#endif

View File

@@ -28,20 +28,20 @@ int mp_lcm (mp_int * a, mp_int * b, mp_int * c)
/* t1 = get the GCD of the two inputs */
if ((res = mp_gcd (a, b, &t1)) != MP_OKAY) {
goto __T;
goto LBL_T;
}
/* divide the smallest by the GCD */
if (mp_cmp_mag(a, b) == MP_LT) {
/* store quotient in t2 such that t2 * b is the LCM */
if ((res = mp_div(a, &t1, &t2, NULL)) != MP_OKAY) {
goto __T;
goto LBL_T;
}
res = mp_mul(b, &t2, c);
} else {
/* store quotient in t2 such that t2 * a is the LCM */
if ((res = mp_div(b, &t1, &t2, NULL)) != MP_OKAY) {
goto __T;
goto LBL_T;
}
res = mp_mul(a, &t2, c);
}
@@ -49,7 +49,7 @@ int mp_lcm (mp_int * a, mp_int * b, mp_int * c)
/* fix the sign to positive */
c->sign = MP_ZPOS;
__T:
LBL_T:
mp_clear_multi (&t1, &t2, NULL);
return res;
}

View File

@@ -28,7 +28,7 @@ mp_mod_2d (mp_int * a, int b, mp_int * c)
}
/* if the modulus is larger than the value than return */
if (b > (int) (a->used * DIGIT_BIT)) {
if (b >= (int) (a->used * DIGIT_BIT)) {
res = mp_copy (a, c);
return res;
}

View File

@@ -28,7 +28,6 @@ int mp_montgomery_calc_normalization (mp_int * a, mp_int * b)
/* how many bits of last digit does b use */
bits = mp_count_bits (b) % DIGIT_BIT;
if (b->used > 1) {
if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) {
return res;

View File

@@ -57,8 +57,9 @@ mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
}
/* store final carry [if any] */
/* store final carry [if any] and increment ix offset */
*tmpc++ = u;
++ix;
/* now zero digits above the top */
while (ix++ < olduse) {

View File

@@ -40,11 +40,11 @@ int mp_n_root (mp_int * a, mp_digit b, mp_int * c)
}
if ((res = mp_init (&t2)) != MP_OKAY) {
goto __T1;
goto LBL_T1;
}
if ((res = mp_init (&t3)) != MP_OKAY) {
goto __T2;
goto LBL_T2;
}
/* if a is negative fudge the sign but keep track */
@@ -57,52 +57,52 @@ int mp_n_root (mp_int * a, mp_digit b, mp_int * c)
do {
/* t1 = t2 */
if ((res = mp_copy (&t2, &t1)) != MP_OKAY) {
goto __T3;
goto LBL_T3;
}
/* t2 = t1 - ((t1**b - a) / (b * t1**(b-1))) */
/* t3 = t1**(b-1) */
if ((res = mp_expt_d (&t1, b - 1, &t3)) != MP_OKAY) {
goto __T3;
goto LBL_T3;
}
/* numerator */
/* t2 = t1**b */
if ((res = mp_mul (&t3, &t1, &t2)) != MP_OKAY) {
goto __T3;
goto LBL_T3;
}
/* t2 = t1**b - a */
if ((res = mp_sub (&t2, a, &t2)) != MP_OKAY) {
goto __T3;
goto LBL_T3;
}
/* denominator */
/* t3 = t1**(b-1) * b */
if ((res = mp_mul_d (&t3, b, &t3)) != MP_OKAY) {
goto __T3;
goto LBL_T3;
}
/* t3 = (t1**b - a)/(b * t1**(b-1)) */
if ((res = mp_div (&t2, &t3, &t3, NULL)) != MP_OKAY) {
goto __T3;
goto LBL_T3;
}
if ((res = mp_sub (&t1, &t3, &t2)) != MP_OKAY) {
goto __T3;
goto LBL_T3;
}
} while (mp_cmp (&t1, &t2) != MP_EQ);
/* result can be off by a few so check */
for (;;) {
if ((res = mp_expt_d (&t1, b, &t2)) != MP_OKAY) {
goto __T3;
goto LBL_T3;
}
if (mp_cmp (&t2, a) == MP_GT) {
if ((res = mp_sub_d (&t1, 1, &t1)) != MP_OKAY) {
goto __T3;
goto LBL_T3;
}
} else {
break;
@@ -120,9 +120,9 @@ int mp_n_root (mp_int * a, mp_digit b, mp_int * c)
res = MP_OKAY;
__T3:mp_clear (&t3);
__T2:mp_clear (&t2);
__T1:mp_clear (&t1);
LBL_T3:mp_clear (&t3);
LBL_T2:mp_clear (&t2);
LBL_T1:mp_clear (&t1);
return res;
}
#endif

View File

@@ -19,12 +19,18 @@
int mp_neg (mp_int * a, mp_int * b)
{
int res;
if ((res = mp_copy (a, b)) != MP_OKAY) {
return res;
if (a != b) {
if ((res = mp_copy (a, b)) != MP_OKAY) {
return res;
}
}
if (mp_iszero(b) != MP_YES) {
b->sign = (a->sign == MP_ZPOS) ? MP_NEG : MP_ZPOS;
} else {
b->sign = MP_ZPOS;
}
return MP_OKAY;
}
#endif

View File

@@ -43,7 +43,7 @@ int mp_prime_fermat (mp_int * a, mp_int * b, int *result)
/* compute t = b**a mod a */
if ((err = mp_exptmod (b, a, a, &t)) != MP_OKAY) {
goto __T;
goto LBL_T;
}
/* is it equal to b? */
@@ -52,7 +52,7 @@ int mp_prime_fermat (mp_int * a, mp_int * b, int *result)
}
err = MP_OKAY;
__T:mp_clear (&t);
LBL_T:mp_clear (&t);
return err;
}
#endif

View File

@@ -29,8 +29,8 @@ int mp_prime_is_divisible (mp_int * a, int *result)
*result = MP_NO;
for (ix = 0; ix < PRIME_SIZE; ix++) {
/* what is a mod __prime_tab[ix] */
if ((err = mp_mod_d (a, __prime_tab[ix], &res)) != MP_OKAY) {
/* what is a mod LBL_prime_tab[ix] */
if ((err = mp_mod_d (a, ltm_prime_tab[ix], &res)) != MP_OKAY) {
return err;
}

View File

@@ -37,7 +37,7 @@ int mp_prime_is_prime (mp_int * a, int t, int *result)
/* is the input equal to one of the primes in the table? */
for (ix = 0; ix < PRIME_SIZE; ix++) {
if (mp_cmp_d(a, __prime_tab[ix]) == MP_EQ) {
if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) {
*result = 1;
return MP_OKAY;
}
@@ -60,20 +60,20 @@ int mp_prime_is_prime (mp_int * a, int t, int *result)
for (ix = 0; ix < t; ix++) {
/* set the prime */
mp_set (&b, __prime_tab[ix]);
mp_set (&b, ltm_prime_tab[ix]);
if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
goto __B;
goto LBL_B;
}
if (res == MP_NO) {
goto __B;
goto LBL_B;
}
}
/* passed the test */
*result = MP_YES;
__B:mp_clear (&b);
LBL_B:mp_clear (&b);
return err;
}
#endif

View File

@@ -40,12 +40,12 @@ int mp_prime_miller_rabin (mp_int * a, mp_int * b, int *result)
return err;
}
if ((err = mp_sub_d (&n1, 1, &n1)) != MP_OKAY) {
goto __N1;
goto LBL_N1;
}
/* set 2**s * r = n1 */
if ((err = mp_init_copy (&r, &n1)) != MP_OKAY) {
goto __N1;
goto LBL_N1;
}
/* count the number of least significant bits
@@ -55,15 +55,15 @@ int mp_prime_miller_rabin (mp_int * a, mp_int * b, int *result)
/* now divide n - 1 by 2**s */
if ((err = mp_div_2d (&r, s, &r, NULL)) != MP_OKAY) {
goto __R;
goto LBL_R;
}
/* compute y = b**r mod a */
if ((err = mp_init (&y)) != MP_OKAY) {
goto __R;
goto LBL_R;
}
if ((err = mp_exptmod (b, &r, a, &y)) != MP_OKAY) {
goto __Y;
goto LBL_Y;
}
/* if y != 1 and y != n1 do */
@@ -72,12 +72,12 @@ int mp_prime_miller_rabin (mp_int * a, mp_int * b, int *result)
/* while j <= s-1 and y != n1 */
while ((j <= (s - 1)) && mp_cmp (&y, &n1) != MP_EQ) {
if ((err = mp_sqrmod (&y, a, &y)) != MP_OKAY) {
goto __Y;
goto LBL_Y;
}
/* if y == 1 then composite */
if (mp_cmp_d (&y, 1) == MP_EQ) {
goto __Y;
goto LBL_Y;
}
++j;
@@ -85,15 +85,15 @@ int mp_prime_miller_rabin (mp_int * a, mp_int * b, int *result)
/* if y != n1 then composite */
if (mp_cmp (&y, &n1) != MP_EQ) {
goto __Y;
goto LBL_Y;
}
}
/* probably prime now */
*result = MP_YES;
__Y:mp_clear (&y);
__R:mp_clear (&r);
__N1:mp_clear (&n1);
LBL_Y:mp_clear (&y);
LBL_R:mp_clear (&r);
LBL_N1:mp_clear (&n1);
return err;
}
#endif

View File

@@ -35,10 +35,10 @@ int mp_prime_next_prime(mp_int *a, int t, int bbs_style)
a->sign = MP_ZPOS;
/* simple algo if a is less than the largest prime in the table */
if (mp_cmp_d(a, __prime_tab[PRIME_SIZE-1]) == MP_LT) {
if (mp_cmp_d(a, ltm_prime_tab[PRIME_SIZE-1]) == MP_LT) {
/* find which prime it is bigger than */
for (x = PRIME_SIZE - 2; x >= 0; x--) {
if (mp_cmp_d(a, __prime_tab[x]) != MP_LT) {
if (mp_cmp_d(a, ltm_prime_tab[x]) != MP_LT) {
if (bbs_style == 1) {
/* ok we found a prime smaller or
* equal [so the next is larger]
@@ -46,17 +46,17 @@ int mp_prime_next_prime(mp_int *a, int t, int bbs_style)
* however, the prime must be
* congruent to 3 mod 4
*/
if ((__prime_tab[x + 1] & 3) != 3) {
if ((ltm_prime_tab[x + 1] & 3) != 3) {
/* scan upwards for a prime congruent to 3 mod 4 */
for (y = x + 1; y < PRIME_SIZE; y++) {
if ((__prime_tab[y] & 3) == 3) {
mp_set(a, __prime_tab[y]);
if ((ltm_prime_tab[y] & 3) == 3) {
mp_set(a, ltm_prime_tab[y]);
return MP_OKAY;
}
}
}
} else {
mp_set(a, __prime_tab[x + 1]);
mp_set(a, ltm_prime_tab[x + 1]);
return MP_OKAY;
}
}
@@ -94,7 +94,7 @@ int mp_prime_next_prime(mp_int *a, int t, int bbs_style)
/* generate the restable */
for (x = 1; x < PRIME_SIZE; x++) {
if ((err = mp_mod_d(a, __prime_tab[x], res_tab + x)) != MP_OKAY) {
if ((err = mp_mod_d(a, ltm_prime_tab[x], res_tab + x)) != MP_OKAY) {
return err;
}
}
@@ -120,8 +120,8 @@ int mp_prime_next_prime(mp_int *a, int t, int bbs_style)
res_tab[x] += kstep;
/* subtract the modulus [instead of using division] */
if (res_tab[x] >= __prime_tab[x]) {
res_tab[x] -= __prime_tab[x];
if (res_tab[x] >= ltm_prime_tab[x]) {
res_tab[x] -= ltm_prime_tab[x];
}
/* set flag if zero */
@@ -133,7 +133,7 @@ int mp_prime_next_prime(mp_int *a, int t, int bbs_style)
/* add the step */
if ((err = mp_add_d(a, step, a)) != MP_OKAY) {
goto __ERR;
goto LBL_ERR;
}
/* if didn't pass sieve and step == MAX then skip test */
@@ -143,9 +143,9 @@ int mp_prime_next_prime(mp_int *a, int t, int bbs_style)
/* is this prime? */
for (x = 0; x < t; x++) {
mp_set(&b, __prime_tab[t]);
mp_set(&b, ltm_prime_tab[t]);
if ((err = mp_prime_miller_rabin(a, &b, &res)) != MP_OKAY) {
goto __ERR;
goto LBL_ERR;
}
if (res == MP_NO) {
break;
@@ -158,7 +158,7 @@ int mp_prime_next_prime(mp_int *a, int t, int bbs_style)
}
err = MP_OKAY;
__ERR:
LBL_ERR:
mp_clear(&b);
return err;
}

View File

@@ -47,7 +47,7 @@ int mp_prime_random_ex(mp_int *a, int t, int size, int flags, ltm_prime_callback
}
/* calc the byte size */
bsize = (size>>3)+(size&7?1:0);
bsize = (size>>3) + ((size&7)?1:0);
/* we need a buffer of bsize bytes */
tmp = OPT_CAST(unsigned char) XMALLOC(bsize);
@@ -56,19 +56,19 @@ int mp_prime_random_ex(mp_int *a, int t, int size, int flags, ltm_prime_callback
}
/* calc the maskAND value for the MSbyte*/
maskAND = 0xFF >> (8 - (size & 7));
maskAND = ((size&7) == 0) ? 0xFF : (0xFF >> (8 - (size & 7)));
/* calc the maskOR_msb */
maskOR_msb = 0;
maskOR_msb_offset = (size - 2) >> 3;
maskOR_msb_offset = ((size & 7) == 1) ? 1 : 0;
if (flags & LTM_PRIME_2MSB_ON) {
maskOR_msb |= 1 << ((size - 2) & 7);
} else if (flags & LTM_PRIME_2MSB_OFF) {
maskAND &= ~(1 << ((size - 2) & 7));
}
}
/* get the maskOR_lsb */
maskOR_lsb = 0;
maskOR_lsb = 1;
if (flags & LTM_PRIME_BBS) {
maskOR_lsb |= 3;
}

View File

@@ -35,22 +35,29 @@ int mp_radix_size (mp_int * a, int radix, int *size)
return MP_VAL;
}
/* init a copy of the input */
if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
return res;
if (mp_iszero(a) == MP_YES) {
*size = 2;
return MP_OKAY;
}
/* digs is the digit count */
digs = 0;
/* if it's negative add one for the sign */
if (t.sign == MP_NEG) {
if (a->sign == MP_NEG) {
++digs;
t.sign = MP_ZPOS;
}
/* init a copy of the input */
if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
return res;
}
/* force temp to positive */
t.sign = MP_ZPOS;
/* fetch out all of the digits */
while (mp_iszero (&t) == 0) {
while (mp_iszero (&t) == MP_NO) {
if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
mp_clear (&t);
return res;

View File

@@ -29,14 +29,14 @@ mp_rand (mp_int * a, int digits)
/* first place a random non-zero digit */
do {
d = ((mp_digit) abs (rand ()));
d = ((mp_digit) abs (rand ())) & MP_MASK;
} while (d == 0);
if ((res = mp_add_d (a, d, a)) != MP_OKAY) {
return res;
}
while (digits-- > 0) {
while (--digits > 0) {
if ((res = mp_lshd (a, 1)) != MP_OKAY) {
return res;
}

View File

@@ -16,7 +16,7 @@
*/
/* read a string [ASCII] in a given radix */
int mp_read_radix (mp_int * a, char *str, int radix)
int mp_read_radix (mp_int * a, const char *str, int radix)
{
int y, res, neg;
char ch;

View File

@@ -19,8 +19,7 @@
* precomputed via mp_reduce_setup.
* From HAC pp.604 Algorithm 14.42
*/
int
mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
int mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
{
mp_int q;
int res, um = m->used;
@@ -40,11 +39,11 @@ mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
}
} else {
#ifdef BN_S_MP_MUL_HIGH_DIGS_C
if ((res = s_mp_mul_high_digs (&q, mu, &q, um - 1)) != MP_OKAY) {
if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
goto CLEANUP;
}
#elif defined(BN_FAST_S_MP_MUL_HIGH_DIGS_C)
if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um - 1)) != MP_OKAY) {
if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
goto CLEANUP;
}
#else

View File

@@ -16,8 +16,7 @@
*/
/* reduces a modulo n where n is of the form 2**p - d */
int
mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d)
int mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d)
{
mp_int q;
int p, res;

58
bn_mp_reduce_2k_l.c Normal file
View File

@@ -0,0 +1,58 @@
#include <tommath.h>
#ifdef BN_MP_REDUCE_2K_L_C
/* LibTomMath, multiple-precision integer library -- Tom St Denis
*
* LibTomMath is a library that provides multiple-precision
* integer arithmetic as well as number theoretic functionality.
*
* The library was designed directly after the MPI library by
* Michael Fromberger but has been written from scratch with
* additional optimizations in place.
*
* The library is free for all purposes without any express
* guarantee it works.
*
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
*/
/* reduces a modulo n where n is of the form 2**p - d
This differs from reduce_2k since "d" can be larger
than a single digit.
*/
int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d)
{
mp_int q;
int p, res;
if ((res = mp_init(&q)) != MP_OKAY) {
return res;
}
p = mp_count_bits(n);
top:
/* q = a/2**p, a = a mod 2**p */
if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
goto ERR;
}
/* q = q * d */
if ((res = mp_mul(&q, d, &q)) != MP_OKAY) {
goto ERR;
}
/* a = a + q */
if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
goto ERR;
}
if (mp_cmp_mag(a, n) != MP_LT) {
s_mp_sub(a, n, a);
goto top;
}
ERR:
mp_clear(&q);
return res;
}
#endif

View File

@@ -16,8 +16,7 @@
*/
/* determines the setup value */
int
mp_reduce_2k_setup(mp_int *a, mp_digit *d)
int mp_reduce_2k_setup(mp_int *a, mp_digit *d)
{
int res, p;
mp_int tmp;

40
bn_mp_reduce_2k_setup_l.c Normal file
View File

@@ -0,0 +1,40 @@
#include <tommath.h>
#ifdef BN_MP_REDUCE_2K_SETUP_L_C
/* LibTomMath, multiple-precision integer library -- Tom St Denis
*
* LibTomMath is a library that provides multiple-precision
* integer arithmetic as well as number theoretic functionality.
*
* The library was designed directly after the MPI library by
* Michael Fromberger but has been written from scratch with
* additional optimizations in place.
*
* The library is free for all purposes without any express
* guarantee it works.
*
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
*/
/* determines the setup value */
int mp_reduce_2k_setup_l(mp_int *a, mp_int *d)
{
int res;
mp_int tmp;
if ((res = mp_init(&tmp)) != MP_OKAY) {
return res;
}
if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) {
goto ERR;
}
if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) {
goto ERR;
}
ERR:
mp_clear(&tmp);
return res;
}
#endif

View File

@@ -22,9 +22,9 @@ int mp_reduce_is_2k(mp_int *a)
mp_digit iz;
if (a->used == 0) {
return 0;
return MP_NO;
} else if (a->used == 1) {
return 1;
return MP_YES;
} else if (a->used > 1) {
iy = mp_count_bits(a);
iz = 1;
@@ -33,7 +33,7 @@ int mp_reduce_is_2k(mp_int *a)
/* Test every bit from the second digit up, must be 1 */
for (ix = DIGIT_BIT; ix < iy; ix++) {
if ((a->dp[iw] & iz) == 0) {
return 0;
return MP_NO;
}
iz <<= 1;
if (iz > (mp_digit)MP_MASK) {
@@ -42,7 +42,7 @@ int mp_reduce_is_2k(mp_int *a)
}
}
}
return 1;
return MP_YES;
}
#endif

40
bn_mp_reduce_is_2k_l.c Normal file
View File

@@ -0,0 +1,40 @@
#include <tommath.h>
#ifdef BN_MP_REDUCE_IS_2K_L_C
/* LibTomMath, multiple-precision integer library -- Tom St Denis
*
* LibTomMath is a library that provides multiple-precision
* integer arithmetic as well as number theoretic functionality.
*
* The library was designed directly after the MPI library by
* Michael Fromberger but has been written from scratch with
* additional optimizations in place.
*
* The library is free for all purposes without any express
* guarantee it works.
*
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
*/
/* determines if reduce_2k_l can be used */
int mp_reduce_is_2k_l(mp_int *a)
{
int ix, iy;
if (a->used == 0) {
return MP_NO;
} else if (a->used == 1) {
return MP_YES;
} else if (a->used > 1) {
/* if more than half of the digits are -1 we're sold */
for (iy = ix = 0; ix < a->used; ix++) {
if (a->dp[ix] == MP_MASK) {
++iy;
}
}
return (iy >= (a->used/2)) ? MP_YES : MP_NO;
}
return MP_NO;
}
#endif

View File

@@ -16,8 +16,7 @@
*/
/* store in signed [big endian] format */
int
mp_to_signed_bin (mp_int * a, unsigned char *b)
int mp_to_signed_bin (mp_int * a, unsigned char *b)
{
int res;

27
bn_mp_to_signed_bin_n.c Normal file
View File

@@ -0,0 +1,27 @@
#include <tommath.h>
#ifdef BN_MP_TO_SIGNED_BIN_N_C
/* LibTomMath, multiple-precision integer library -- Tom St Denis
*
* LibTomMath is a library that provides multiple-precision
* integer arithmetic as well as number theoretic functionality.
*
* The library was designed directly after the MPI library by
* Michael Fromberger but has been written from scratch with
* additional optimizations in place.
*
* The library is free for all purposes without any express
* guarantee it works.
*
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
*/
/* store in signed [big endian] format */
int mp_to_signed_bin_n (mp_int * a, unsigned char *b, unsigned long *outlen)
{
if (*outlen < (unsigned long)mp_signed_bin_size(a)) {
return MP_VAL;
}
*outlen = mp_signed_bin_size(a);
return mp_to_signed_bin(a, b);
}
#endif

View File

@@ -16,8 +16,7 @@
*/
/* store in unsigned [big endian] format */
int
mp_to_unsigned_bin (mp_int * a, unsigned char *b)
int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
{
int x, res;
mp_int t;

27
bn_mp_to_unsigned_bin_n.c Normal file
View File

@@ -0,0 +1,27 @@
#include <tommath.h>
#ifdef BN_MP_TO_UNSIGNED_BIN_N_C
/* LibTomMath, multiple-precision integer library -- Tom St Denis
*
* LibTomMath is a library that provides multiple-precision
* integer arithmetic as well as number theoretic functionality.
*
* The library was designed directly after the MPI library by
* Michael Fromberger but has been written from scratch with
* additional optimizations in place.
*
* The library is free for all purposes without any express
* guarantee it works.
*
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
*/
/* store in unsigned [big endian] format */
int mp_to_unsigned_bin_n (mp_int * a, unsigned char *b, unsigned long *outlen)
{
if (*outlen < (unsigned long)mp_unsigned_bin_size(a)) {
return MP_VAL;
}
*outlen = mp_unsigned_bin_size(a);
return mp_to_unsigned_bin(a, b);
}
#endif

View File

@@ -17,9 +17,10 @@
/* multiplication using the Toom-Cook 3-way algorithm
*
* Much more complicated than Karatsuba but has a lower asymptotic running time of
* O(N**1.464). This algorithm is only particularly useful on VERY large
* inputs (we're talking 1000s of digits here...).
* Much more complicated than Karatsuba but has a lower
* asymptotic running time of O(N**1.464). This algorithm is
* only particularly useful on VERY large inputs
* (we're talking 1000s of digits here...).
*/
int mp_toom_mul(mp_int *a, mp_int *b, mp_int *c)
{

View File

@@ -16,8 +16,7 @@
*/
/* get the size for an unsigned equivalent */
int
mp_unsigned_bin_size (mp_int * a)
int mp_unsigned_bin_size (mp_int * a)
{
int size = mp_count_bits (a);
return (size / 8 + ((size & 7) != 0 ? 1 : 0));

View File

@@ -37,7 +37,7 @@ mp_xor (mp_int * a, mp_int * b, mp_int * c)
}
for (ix = 0; ix < px; ix++) {
t.dp[ix] ^= x->dp[ix];
}
mp_clamp (&t);
mp_exch (c, &t);

View File

@@ -16,11 +16,17 @@
*/
/* set to zero */
void
mp_zero (mp_int * a)
void mp_zero (mp_int * a)
{
int n;
mp_digit *tmp;
a->sign = MP_ZPOS;
a->used = 0;
memset (a->dp, 0, sizeof (mp_digit) * a->alloc);
tmp = a->dp;
for (n = 0; n < a->alloc; n++) {
*tmp++ = 0;
}
}
#endif

View File

@@ -14,7 +14,7 @@
*
* Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
*/
const mp_digit __prime_tab[] = {
const mp_digit ltm_prime_tab[] = {
0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,

View File

@@ -21,11 +21,12 @@
#define TAB_SIZE 256
#endif
int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
{
mp_int M[TAB_SIZE], res, mu;
mp_digit buf;
int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
int (*redux)(mp_int*,mp_int*,mp_int*);
/* find window size */
x = mp_count_bits (X);
@@ -70,11 +71,20 @@ int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
/* create mu, used for Barrett reduction */
if ((err = mp_init (&mu)) != MP_OKAY) {
goto __M;
}
if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
goto __MU;
goto LBL_M;
}
if (redmode == 0) {
if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
goto LBL_MU;
}
redux = mp_reduce;
} else {
if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) {
goto LBL_MU;
}
redux = mp_reduce_2k_l;
}
/* create M table
*
@@ -85,23 +95,26 @@ int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
* computed though accept for M[0] and M[1]
*/
if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
goto __MU;
goto LBL_MU;
}
/* compute the value at M[1<<(winsize-1)] by squaring
* M[1] (winsize-1) times
*/
if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
goto __MU;
goto LBL_MU;
}
for (x = 0; x < (winsize - 1); x++) {
/* square it */
if ((err = mp_sqr (&M[1 << (winsize - 1)],
&M[1 << (winsize - 1)])) != MP_OKAY) {
goto __MU;
goto LBL_MU;
}
if ((err = mp_reduce (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
goto __MU;
/* reduce modulo P */
if ((err = redux (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
goto LBL_MU;
}
}
@@ -110,16 +123,16 @@ int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
*/
for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
goto __MU;
goto LBL_MU;
}
if ((err = mp_reduce (&M[x], P, &mu)) != MP_OKAY) {
goto __MU;
if ((err = redux (&M[x], P, &mu)) != MP_OKAY) {
goto LBL_MU;
}
}
/* setup result */
if ((err = mp_init (&res)) != MP_OKAY) {
goto __MU;
goto LBL_MU;
}
mp_set (&res, 1);
@@ -159,10 +172,10 @@ int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
/* if the bit is zero and mode == 1 then we square */
if (mode == 1 && y == 0) {
if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
goto __RES;
goto LBL_RES;
}
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
goto __RES;
if ((err = redux (&res, P, &mu)) != MP_OKAY) {
goto LBL_RES;
}
continue;
}
@@ -176,19 +189,19 @@ int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
/* square first */
for (x = 0; x < winsize; x++) {
if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
goto __RES;
goto LBL_RES;
}
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
goto __RES;
if ((err = redux (&res, P, &mu)) != MP_OKAY) {
goto LBL_RES;
}
}
/* then multiply */
if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
goto __RES;
goto LBL_RES;
}
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
goto __RES;
if ((err = redux (&res, P, &mu)) != MP_OKAY) {
goto LBL_RES;
}
/* empty window and reset */
@@ -203,20 +216,20 @@ int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
/* square then multiply if the bit is set */
for (x = 0; x < bitcpy; x++) {
if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
goto __RES;
goto LBL_RES;
}
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
goto __RES;
if ((err = redux (&res, P, &mu)) != MP_OKAY) {
goto LBL_RES;
}
bitbuf <<= 1;
if ((bitbuf & (1 << winsize)) != 0) {
/* then multiply */
if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
goto __RES;
goto LBL_RES;
}
if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
goto __RES;
if ((err = redux (&res, P, &mu)) != MP_OKAY) {
goto LBL_RES;
}
}
}
@@ -224,9 +237,9 @@ int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
mp_exch (&res, Y);
err = MP_OKAY;
__RES:mp_clear (&res);
__MU:mp_clear (&mu);
__M:
LBL_RES:mp_clear (&res);
LBL_MU:mp_clear (&mu);
LBL_M:
mp_clear(&M[1]);
for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
mp_clear (&M[x]);

View File

@@ -19,8 +19,7 @@
* HAC pp. 595, Algorithm 14.12 Modified so you can control how
* many digits of output are created.
*/
int
s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
{
mp_int t;
int res, pa, pb, ix, iy;

View File

@@ -16,8 +16,7 @@
*/
/* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
int
s_mp_sqr (mp_int * a, mp_int * b)
int s_mp_sqr (mp_int * a, mp_int * b)
{
mp_int t;
int res, ix, iy, pa;

View File

@@ -20,11 +20,12 @@
CPU /Compiler /MUL CUTOFF/SQR CUTOFF
-------------------------------------------------------------
Intel P4 Northwood /GCC v3.4.1 / 88/ 128/LTM 0.32 ;-)
AMD Athlon64 /GCC v3.4.4 / 74/ 124/LTM 0.34
*/
int KARATSUBA_MUL_CUTOFF = 88, /* Min. number of digits before Karatsuba multiplication is used. */
KARATSUBA_SQR_CUTOFF = 128, /* Min. number of digits before Karatsuba squaring is used. */
int KARATSUBA_MUL_CUTOFF = 74, /* Min. number of digits before Karatsuba multiplication is used. */
KARATSUBA_SQR_CUTOFF = 124, /* Min. number of digits before Karatsuba squaring is used. */
TOOM_MUL_CUTOFF = 350, /* no optimal values of these are known yet so set em high */
TOOM_SQR_CUTOFF = 400;

File diff suppressed because it is too large Load Diff

View File

@@ -1,3 +1,35 @@
March 12th, 2005
v0.35 -- Stupid XOR function missing line again... oops.
-- Fixed bug in invmod not handling negative inputs correctly [Wolfgang Ehrhardt]
-- Made exteuclid always give positive u3 output...[ Wolfgang Ehrhardt ]
-- [Wolfgang Ehrhardt] Suggested a fix for mp_reduce() which avoided underruns. ;-)
-- mp_rand() would emit one too many digits and it was possible to get a 0 out of it ... oops
-- Added montgomery to the testing to make sure it handles 1..10 digit moduli correctly
-- Fixed bug in comba that would lead to possible erroneous outputs when "pa < digs"
-- Fixed bug in mp_toradix_size for "0" [Kevin Kenny]
-- Updated chapters 1-5 of the textbook ;-) It now talks about the new comba code!
February 12th, 2005
v0.34 -- Fixed two more small errors in mp_prime_random_ex()
-- Fixed overflow in mp_mul_d() [Kevin Kenny]
-- Added mp_to_(un)signed_bin_n() functions which do bounds checking for ya [and report the size]
-- Added "large" diminished radix support. Speeds up things like DSA where the moduli is of the form 2^k - P for some P < 2^(k/2) or so
Actually is faster than Montgomery on my AMD64 (and probably much faster on a P4)
-- Updated the manual a bit
-- Ok so I haven't done the textbook work yet... My current freelance gig has landed me in France till the
end of Feb/05. Once I get back I'll have tons of free time and I plan to go to town on the book.
As of this release the API will freeze. At least until the book catches up with all the changes. I welcome
bug reports but new algorithms will have to wait.
December 23rd, 2004
v0.33 -- Fixed "small" variant for mp_div() which would munge with negative dividends...
-- Fixed bug in mp_prime_random_ex() which would set the most significant byte to zero when
no special flags were set
-- Fixed overflow [minor] bug in fast_s_mp_sqr()
-- Made the makefiles easier to configure the group/user that ltm will install as
-- Fixed "final carry" bug in comba multipliers. (Volkan Ceylan)
-- Matt Johnston pointed out a missing semi-colon in mp_exptmod
October 29th, 2004
v0.32 -- Added "makefile.shared" for shared object support
-- Added more to the build options/configs in the manual

File diff suppressed because it is too large Load Diff

View File

@@ -11,15 +11,16 @@ ulong64 _tt;
#endif
void ndraw(mp_int *a, char *name)
void ndraw(mp_int * a, char *name)
{
char buf[4096];
printf("%s: ", name);
mp_toradix(a, buf, 64);
printf("%s\n", buf);
}
static void draw(mp_int *a)
static void draw(mp_int * a)
{
ndraw(a, "");
}
@@ -38,40 +39,39 @@ int lbit(void)
}
}
#if defined(__i386__) || defined(_M_IX86) || defined(_M_AMD64)
/* RDTSC from Scott Duplichan */
static ulong64 TIMFUNC (void)
{
#if defined __GNUC__
#ifdef __i386__
ulong64 a;
__asm__ __volatile__ ("rdtsc ":"=A" (a));
return a;
#else /* gcc-IA64 version */
unsigned long result;
__asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory");
while (__builtin_expect ((int) result == -1, 0))
__asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory");
return result;
#endif
static ulong64 TIMFUNC(void)
{
#if defined __GNUC__
#if defined(__i386__) || defined(__x86_64__)
unsigned long long a;
__asm__ __volatile__("rdtsc\nmovl %%eax,%0\nmovl %%edx,4+%0\n"::
"m"(a):"%eax", "%edx");
return a;
#else /* gcc-IA64 version */
unsigned long result;
__asm__ __volatile__("mov %0=ar.itc":"=r"(result)::"memory");
while (__builtin_expect((int) result == -1, 0))
__asm__ __volatile__("mov %0=ar.itc":"=r"(result)::"memory");
return result;
#endif
// Microsoft and Intel Windows compilers
#elif defined _M_IX86
__asm rdtsc
#elif defined _M_AMD64
return __rdtsc ();
#elif defined _M_IA64
#if defined __INTEL_COMPILER
#include <ia64intrin.h>
#endif
return __getReg (3116);
#else
#error need rdtsc function for this build
#endif
}
#else
#define TIMFUNC clock
#elif defined _M_IX86
__asm rdtsc
#elif defined _M_AMD64
return __rdtsc();
#elif defined _M_IA64
#if defined __INTEL_COMPILER
#include <ia64intrin.h>
#endif
return __getReg(3116);
#else
#error need rdtsc function for this build
#endif
}
#define DO(x) x; x;
//#define DO4(x) DO2(x); DO2(x);
@@ -81,7 +81,7 @@ static ulong64 TIMFUNC (void)
int main(void)
{
ulong64 tt, gg, CLK_PER_SEC;
FILE *log, *logb, *logc;
FILE *log, *logb, *logc, *logd;
mp_int a, b, c, d, e, f;
int n, cnt, ix, old_kara_m, old_kara_s;
unsigned rr;
@@ -94,168 +94,191 @@ int main(void)
mp_init(&f);
srand(time(NULL));
/* temp. turn off TOOM */
TOOM_MUL_CUTOFF = TOOM_SQR_CUTOFF = 100000;
CLK_PER_SEC = TIMFUNC();
sleep(1);
CLK_PER_SEC = TIMFUNC() - CLK_PER_SEC;
/* temp. turn off TOOM */
TOOM_MUL_CUTOFF = TOOM_SQR_CUTOFF = 100000;
printf("CLK_PER_SEC == %llu\n", CLK_PER_SEC);
log = fopen("logs/add.log", "w");
for (cnt = 8; cnt <= 128; cnt += 8) {
SLEEP;
mp_rand(&a, cnt);
mp_rand(&b, cnt);
rr = 0;
tt = -1;
do {
gg = TIMFUNC();
DO(mp_add(&a,&b,&c));
gg = (TIMFUNC() - gg)>>1;
if (tt > gg) tt = gg;
} while (++rr < 100000);
printf("Adding\t\t%4d-bit => %9llu/sec, %9llu cycles\n", mp_count_bits(&a), CLK_PER_SEC/tt, tt);
fprintf(log, "%d %9llu\n", cnt*DIGIT_BIT, tt); fflush(log);
}
fclose(log);
CLK_PER_SEC = TIMFUNC();
sleep(1);
CLK_PER_SEC = TIMFUNC() - CLK_PER_SEC;
log = fopen("logs/sub.log", "w");
for (cnt = 8; cnt <= 128; cnt += 8) {
SLEEP;
mp_rand(&a, cnt);
mp_rand(&b, cnt);
rr = 0;
tt = -1;
do {
gg = TIMFUNC();
DO(mp_sub(&a,&b,&c));
gg = (TIMFUNC() - gg)>>1;
if (tt > gg) tt = gg;
} while (++rr < 100000);
printf("CLK_PER_SEC == %llu\n", CLK_PER_SEC);
goto exptmod;
log = fopen("logs/add.log", "w");
for (cnt = 8; cnt <= 128; cnt += 8) {
SLEEP;
mp_rand(&a, cnt);
mp_rand(&b, cnt);
rr = 0;
tt = -1;
do {
gg = TIMFUNC();
DO(mp_add(&a, &b, &c));
gg = (TIMFUNC() - gg) >> 1;
if (tt > gg)
tt = gg;
} while (++rr < 100000);
printf("Adding\t\t%4d-bit => %9llu/sec, %9llu cycles\n",
mp_count_bits(&a), CLK_PER_SEC / tt, tt);
fprintf(log, "%d %9llu\n", cnt * DIGIT_BIT, tt);
fflush(log);
}
fclose(log);
printf("Subtracting\t\t%4d-bit => %9llu/sec, %9llu cycles\n", mp_count_bits(&a), CLK_PER_SEC/tt, tt);
fprintf(log, "%d %9llu\n", cnt*DIGIT_BIT, tt); fflush(log);
}
fclose(log);
log = fopen("logs/sub.log", "w");
for (cnt = 8; cnt <= 128; cnt += 8) {
SLEEP;
mp_rand(&a, cnt);
mp_rand(&b, cnt);
rr = 0;
tt = -1;
do {
gg = TIMFUNC();
DO(mp_sub(&a, &b, &c));
gg = (TIMFUNC() - gg) >> 1;
if (tt > gg)
tt = gg;
} while (++rr < 100000);
printf("Subtracting\t\t%4d-bit => %9llu/sec, %9llu cycles\n",
mp_count_bits(&a), CLK_PER_SEC / tt, tt);
fprintf(log, "%d %9llu\n", cnt * DIGIT_BIT, tt);
fflush(log);
}
fclose(log);
/* do mult/square twice, first without karatsuba and second with */
multtest:
old_kara_m = KARATSUBA_MUL_CUTOFF;
old_kara_s = KARATSUBA_SQR_CUTOFF;
for (ix = 0; ix < 1; ix++) {
printf("With%s Karatsuba\n", (ix==0)?"out":"");
for (ix = 0; ix < 2; ix++) {
printf("With%s Karatsuba\n", (ix == 0) ? "out" : "");
KARATSUBA_MUL_CUTOFF = (ix==0)?9999:old_kara_m;
KARATSUBA_SQR_CUTOFF = (ix==0)?9999:old_kara_s;
KARATSUBA_MUL_CUTOFF = (ix == 0) ? 9999 : old_kara_m;
KARATSUBA_SQR_CUTOFF = (ix == 0) ? 9999 : old_kara_s;
log = fopen((ix==0)?"logs/mult.log":"logs/mult_kara.log", "w");
for (cnt = 4; cnt <= 288; cnt += 2) {
SLEEP;
mp_rand(&a, cnt);
mp_rand(&b, cnt);
rr = 0;
tt = -1;
do {
gg = TIMFUNC();
DO(mp_mul(&a, &b, &c));
gg = (TIMFUNC() - gg)>>1;
if (tt > gg) tt = gg;
} while (++rr < 100);
printf("Multiplying\t%4d-bit => %9llu/sec, %9llu cycles\n", mp_count_bits(&a), CLK_PER_SEC/tt, tt);
fprintf(log, "%d %9llu\n", mp_count_bits(&a), tt); fflush(log);
log = fopen((ix == 0) ? "logs/mult.log" : "logs/mult_kara.log", "w");
for (cnt = 4; cnt <= 10240 / DIGIT_BIT; cnt += 2) {
SLEEP;
mp_rand(&a, cnt);
mp_rand(&b, cnt);
rr = 0;
tt = -1;
do {
gg = TIMFUNC();
DO(mp_mul(&a, &b, &c));
gg = (TIMFUNC() - gg) >> 1;
if (tt > gg)
tt = gg;
} while (++rr < 100);
printf("Multiplying\t%4d-bit => %9llu/sec, %9llu cycles\n",
mp_count_bits(&a), CLK_PER_SEC / tt, tt);
fprintf(log, "%d %9llu\n", mp_count_bits(&a), tt);
fflush(log);
}
fclose(log);
log = fopen((ix==0)?"logs/sqr.log":"logs/sqr_kara.log", "w");
for (cnt = 4; cnt <= 288; cnt += 2) {
SLEEP;
mp_rand(&a, cnt);
rr = 0;
tt = -1;
do {
gg = TIMFUNC();
DO(mp_sqr(&a, &b));
gg = (TIMFUNC() - gg)>>1;
if (tt > gg) tt = gg;
} while (++rr < 100);
printf("Squaring\t%4d-bit => %9llu/sec, %9llu cycles\n", mp_count_bits(&a), CLK_PER_SEC/tt, tt);
fprintf(log, "%d %9llu\n", mp_count_bits(&a), tt); fflush(log);
log = fopen((ix == 0) ? "logs/sqr.log" : "logs/sqr_kara.log", "w");
for (cnt = 4; cnt <= 10240 / DIGIT_BIT; cnt += 2) {
SLEEP;
mp_rand(&a, cnt);
rr = 0;
tt = -1;
do {
gg = TIMFUNC();
DO(mp_sqr(&a, &b));
gg = (TIMFUNC() - gg) >> 1;
if (tt > gg)
tt = gg;
} while (++rr < 100);
printf("Squaring\t%4d-bit => %9llu/sec, %9llu cycles\n",
mp_count_bits(&a), CLK_PER_SEC / tt, tt);
fprintf(log, "%d %9llu\n", mp_count_bits(&a), tt);
fflush(log);
}
fclose(log);
}
exptmod:
{
{
char *primes[] = {
/* 2K moduli mersenne primes */
"6864797660130609714981900799081393217269435300143305409394463459185543183397656052122559640661454554977296311391480858037121987999716643812574028291115057151",
"531137992816767098689588206552468627329593117727031923199444138200403559860852242739162502265229285668889329486246501015346579337652707239409519978766587351943831270835393219031728127",
"10407932194664399081925240327364085538615262247266704805319112350403608059673360298012239441732324184842421613954281007791383566248323464908139906605677320762924129509389220345773183349661583550472959420547689811211693677147548478866962501384438260291732348885311160828538416585028255604666224831890918801847068222203140521026698435488732958028878050869736186900714720710555703168729087",
"1475979915214180235084898622737381736312066145333169775147771216478570297878078949377407337049389289382748507531496480477281264838760259191814463365330269540496961201113430156902396093989090226259326935025281409614983499388222831448598601834318536230923772641390209490231836446899608210795482963763094236630945410832793769905399982457186322944729636418890623372171723742105636440368218459649632948538696905872650486914434637457507280441823676813517852099348660847172579408422316678097670224011990280170474894487426924742108823536808485072502240519452587542875349976558572670229633962575212637477897785501552646522609988869914013540483809865681250419497686697771007",
"259117086013202627776246767922441530941818887553125427303974923161874019266586362086201209516800483406550695241733194177441689509238807017410377709597512042313066624082916353517952311186154862265604547691127595848775610568757931191017711408826252153849035830401185072116424747461823031471398340229288074545677907941037288235820705892351068433882986888616658650280927692080339605869308790500409503709875902119018371991620994002568935113136548829739112656797303241986517250116412703509705427773477972349821676443446668383119322540099648994051790241624056519054483690809616061625743042361721863339415852426431208737266591962061753535748892894599629195183082621860853400937932839420261866586142503251450773096274235376822938649407127700846077124211823080804139298087057504713825264571448379371125032081826126566649084251699453951887789613650248405739378594599444335231188280123660406262468609212150349937584782292237144339628858485938215738821232393687046160677362909315071",
"190797007524439073807468042969529173669356994749940177394741882673528979787005053706368049835514900244303495954950709725762186311224148828811920216904542206960744666169364221195289538436845390250168663932838805192055137154390912666527533007309292687539092257043362517857366624699975402375462954490293259233303137330643531556539739921926201438606439020075174723029056838272505051571967594608350063404495977660656269020823960825567012344189908927956646011998057988548630107637380993519826582389781888135705408653045219655801758081251164080554609057468028203308718724654081055323215860189611391296030471108443146745671967766308925858547271507311563765171008318248647110097614890313562856541784154881743146033909602737947385055355960331855614540900081456378659068370317267696980001187750995491090350108417050917991562167972281070161305972518044872048331306383715094854938415738549894606070722584737978176686422134354526989443028353644037187375385397838259511833166416134323695660367676897722287918773420968982326089026150031515424165462111337527431154890666327374921446276833564519776797633875503548665093914556482031482248883127023777039667707976559857333357013727342079099064400455741830654320379350833236245819348824064783585692924881021978332974949906122664421376034687815350484991",
/* 2K large moduli */
"179769313486231590772930519078902473361797697894230657273430081157732675805500963132708477322407536021120113879871393357658789768814416622492847430639474124377767893424865485276302219601246094119453082952085005768838150682342462881473913110540827237163350510684586239334100047359817950870678242457666208137217",
"32317006071311007300714876688669951960444102669715484032130345427524655138867890893197201411522913463688717960921898019494119559150490921095088152386448283120630877367300996091750197750389652106796057638384067568276792218642619756161838094338476170470581645852036305042887575891541065808607552399123930385521914333389668342420684974786564569494856176035326322058077805659331026192708460314150258592864177116725943603718461857357598351152301645904403697613233287231227125684710820209725157101726931323469678542580656697935045997268352998638099733077152121140120031150424541696791951097529546801429027668869927491725169",
"1044388881413152506691752710716624382579964249047383780384233483283953907971557456848826811934997558340890106714439262837987573438185793607263236087851365277945956976543709998340361590134383718314428070011855946226376318839397712745672334684344586617496807908705803704071284048740118609114467977783598029006686938976881787785946905630190260940599579453432823469303026696443059025015972399867714215541693835559885291486318237914434496734087811872639496475100189041349008417061675093668333850551032972088269550769983616369411933015213796825837188091833656751221318492846368125550225998300412344784862595674492194617023806505913245610825731835380087608622102834270197698202313169017678006675195485079921636419370285375124784014907159135459982790513399611551794271106831134090584272884279791554849782954323534517065223269061394905987693002122963395687782878948440616007412945674919823050571642377154816321380631045902916136926708342856440730447899971901781465763473223850267253059899795996090799469201774624817718449867455659250178329070473119433165550807568221846571746373296884912819520317457002440926616910874148385078411929804522981857338977648103126085902995208257421855249796721729039744118165938433694823325696642096892124547425283",
/* 2K moduli mersenne primes */
"6864797660130609714981900799081393217269435300143305409394463459185543183397656052122559640661454554977296311391480858037121987999716643812574028291115057151",
"531137992816767098689588206552468627329593117727031923199444138200403559860852242739162502265229285668889329486246501015346579337652707239409519978766587351943831270835393219031728127",
"10407932194664399081925240327364085538615262247266704805319112350403608059673360298012239441732324184842421613954281007791383566248323464908139906605677320762924129509389220345773183349661583550472959420547689811211693677147548478866962501384438260291732348885311160828538416585028255604666224831890918801847068222203140521026698435488732958028878050869736186900714720710555703168729087",
"1475979915214180235084898622737381736312066145333169775147771216478570297878078949377407337049389289382748507531496480477281264838760259191814463365330269540496961201113430156902396093989090226259326935025281409614983499388222831448598601834318536230923772641390209490231836446899608210795482963763094236630945410832793769905399982457186322944729636418890623372171723742105636440368218459649632948538696905872650486914434637457507280441823676813517852099348660847172579408422316678097670224011990280170474894487426924742108823536808485072502240519452587542875349976558572670229633962575212637477897785501552646522609988869914013540483809865681250419497686697771007",
"259117086013202627776246767922441530941818887553125427303974923161874019266586362086201209516800483406550695241733194177441689509238807017410377709597512042313066624082916353517952311186154862265604547691127595848775610568757931191017711408826252153849035830401185072116424747461823031471398340229288074545677907941037288235820705892351068433882986888616658650280927692080339605869308790500409503709875902119018371991620994002568935113136548829739112656797303241986517250116412703509705427773477972349821676443446668383119322540099648994051790241624056519054483690809616061625743042361721863339415852426431208737266591962061753535748892894599629195183082621860853400937932839420261866586142503251450773096274235376822938649407127700846077124211823080804139298087057504713825264571448379371125032081826126566649084251699453951887789613650248405739378594599444335231188280123660406262468609212150349937584782292237144339628858485938215738821232393687046160677362909315071",
"190797007524439073807468042969529173669356994749940177394741882673528979787005053706368049835514900244303495954950709725762186311224148828811920216904542206960744666169364221195289538436845390250168663932838805192055137154390912666527533007309292687539092257043362517857366624699975402375462954490293259233303137330643531556539739921926201438606439020075174723029056838272505051571967594608350063404495977660656269020823960825567012344189908927956646011998057988548630107637380993519826582389781888135705408653045219655801758081251164080554609057468028203308718724654081055323215860189611391296030471108443146745671967766308925858547271507311563765171008318248647110097614890313562856541784154881743146033909602737947385055355960331855614540900081456378659068370317267696980001187750995491090350108417050917991562167972281070161305972518044872048331306383715094854938415738549894606070722584737978176686422134354526989443028353644037187375385397838259511833166416134323695660367676897722287918773420968982326089026150031515424165462111337527431154890666327374921446276833564519776797633875503548665093914556482031482248883127023777039667707976559857333357013727342079099064400455741830654320379350833236245819348824064783585692924881021978332974949906122664421376034687815350484991",
/* DR moduli */
"14059105607947488696282932836518693308967803494693489478439861164411992439598399594747002144074658928593502845729752797260025831423419686528151609940203368612079",
"101745825697019260773923519755878567461315282017759829107608914364075275235254395622580447400994175578963163918967182013639660669771108475957692810857098847138903161308502419410142185759152435680068435915159402496058513611411688900243039",
"736335108039604595805923406147184530889923370574768772191969612422073040099331944991573923112581267542507986451953227192970402893063850485730703075899286013451337291468249027691733891486704001513279827771740183629161065194874727962517148100775228363421083691764065477590823919364012917984605619526140821797602431",
"38564998830736521417281865696453025806593491967131023221754800625044118265468851210705360385717536794615180260494208076605798671660719333199513807806252394423283413430106003596332513246682903994829528690198205120921557533726473585751382193953592127439965050261476810842071573684505878854588706623484573925925903505747545471088867712185004135201289273405614415899438276535626346098904241020877974002916168099951885406379295536200413493190419727789712076165162175783",
"542189391331696172661670440619180536749994166415993334151601745392193484590296600979602378676624808129613777993466242203025054573692562689251250471628358318743978285860720148446448885701001277560572526947619392551574490839286458454994488665744991822837769918095117129546414124448777033941223565831420390846864429504774477949153794689948747680362212954278693335653935890352619041936727463717926744868338358149568368643403037768649616778526013610493696186055899318268339432671541328195724261329606699831016666359440874843103020666106568222401047720269951530296879490444224546654729111504346660859907296364097126834834235287147",
"1487259134814709264092032648525971038895865645148901180585340454985524155135260217788758027400478312256339496385275012465661575576202252063145698732079880294664220579764848767704076761853197216563262660046602703973050798218246170835962005598561669706844469447435461092542265792444947706769615695252256130901271870341005768912974433684521436211263358097522726462083917939091760026658925757076733484173202927141441492573799914240222628795405623953109131594523623353044898339481494120112723445689647986475279242446083151413667587008191682564376412347964146113898565886683139407005941383669325997475076910488086663256335689181157957571445067490187939553165903773554290260531009121879044170766615232300936675369451260747671432073394867530820527479172464106442450727640226503746586340279816318821395210726268291535648506190714616083163403189943334431056876038286530365757187367147446004855912033137386225053275419626102417236133948503",
"1095121115716677802856811290392395128588168592409109494900178008967955253005183831872715423151551999734857184538199864469605657805519106717529655044054833197687459782636297255219742994736751541815269727940751860670268774903340296040006114013971309257028332849679096824800250742691718610670812374272414086863715763724622797509437062518082383056050144624962776302147890521249477060215148275163688301275847155316042279405557632639366066847442861422164832655874655824221577849928863023018366835675399949740429332468186340518172487073360822220449055340582568461568645259954873303616953776393853174845132081121976327462740354930744487429617202585015510744298530101547706821590188733515880733527449780963163909830077616357506845523215289297624086914545378511082534229620116563260168494523906566709418166011112754529766183554579321224940951177394088465596712620076240067370589036924024728375076210477267488679008016579588696191194060127319035195370137160936882402244399699172017835144537488486396906144217720028992863941288217185353914991583400421682751000603596655790990815525126154394344641336397793791497068253936771017031980867706707490224041075826337383538651825493679503771934836094655802776331664261631740148281763487765852746577808019633679",
/* DR moduli */
"14059105607947488696282932836518693308967803494693489478439861164411992439598399594747002144074658928593502845729752797260025831423419686528151609940203368612079",
"101745825697019260773923519755878567461315282017759829107608914364075275235254395622580447400994175578963163918967182013639660669771108475957692810857098847138903161308502419410142185759152435680068435915159402496058513611411688900243039",
"736335108039604595805923406147184530889923370574768772191969612422073040099331944991573923112581267542507986451953227192970402893063850485730703075899286013451337291468249027691733891486704001513279827771740183629161065194874727962517148100775228363421083691764065477590823919364012917984605619526140821797602431",
"38564998830736521417281865696453025806593491967131023221754800625044118265468851210705360385717536794615180260494208076605798671660719333199513807806252394423283413430106003596332513246682903994829528690198205120921557533726473585751382193953592127439965050261476810842071573684505878854588706623484573925925903505747545471088867712185004135201289273405614415899438276535626346098904241020877974002916168099951885406379295536200413493190419727789712076165162175783",
"542189391331696172661670440619180536749994166415993334151601745392193484590296600979602378676624808129613777993466242203025054573692562689251250471628358318743978285860720148446448885701001277560572526947619392551574490839286458454994488665744991822837769918095117129546414124448777033941223565831420390846864429504774477949153794689948747680362212954278693335653935890352619041936727463717926744868338358149568368643403037768649616778526013610493696186055899318268339432671541328195724261329606699831016666359440874843103020666106568222401047720269951530296879490444224546654729111504346660859907296364097126834834235287147",
"1487259134814709264092032648525971038895865645148901180585340454985524155135260217788758027400478312256339496385275012465661575576202252063145698732079880294664220579764848767704076761853197216563262660046602703973050798218246170835962005598561669706844469447435461092542265792444947706769615695252256130901271870341005768912974433684521436211263358097522726462083917939091760026658925757076733484173202927141441492573799914240222628795405623953109131594523623353044898339481494120112723445689647986475279242446083151413667587008191682564376412347964146113898565886683139407005941383669325997475076910488086663256335689181157957571445067490187939553165903773554290260531009121879044170766615232300936675369451260747671432073394867530820527479172464106442450727640226503746586340279816318821395210726268291535648506190714616083163403189943334431056876038286530365757187367147446004855912033137386225053275419626102417236133948503",
"1095121115716677802856811290392395128588168592409109494900178008967955253005183831872715423151551999734857184538199864469605657805519106717529655044054833197687459782636297255219742994736751541815269727940751860670268774903340296040006114013971309257028332849679096824800250742691718610670812374272414086863715763724622797509437062518082383056050144624962776302147890521249477060215148275163688301275847155316042279405557632639366066847442861422164832655874655824221577849928863023018366835675399949740429332468186340518172487073360822220449055340582568461568645259954873303616953776393853174845132081121976327462740354930744487429617202585015510744298530101547706821590188733515880733527449780963163909830077616357506845523215289297624086914545378511082534229620116563260168494523906566709418166011112754529766183554579321224940951177394088465596712620076240067370589036924024728375076210477267488679008016579588696191194060127319035195370137160936882402244399699172017835144537488486396906144217720028992863941288217185353914991583400421682751000603596655790990815525126154394344641336397793791497068253936771017031980867706707490224041075826337383538651825493679503771934836094655802776331664261631740148281763487765852746577808019633679",
/* generic unrestricted moduli */
"17933601194860113372237070562165128350027320072176844226673287945873370751245439587792371960615073855669274087805055507977323024886880985062002853331424203",
"2893527720709661239493896562339544088620375736490408468011883030469939904368086092336458298221245707898933583190713188177399401852627749210994595974791782790253946539043962213027074922559572312141181787434278708783207966459019479487",
"347743159439876626079252796797422223177535447388206607607181663903045907591201940478223621722118173270898487582987137708656414344685816179420855160986340457973820182883508387588163122354089264395604796675278966117567294812714812796820596564876450716066283126720010859041484786529056457896367683122960411136319",
"47266428956356393164697365098120418976400602706072312735924071745438532218237979333351774907308168340693326687317443721193266215155735814510792148768576498491199122744351399489453533553203833318691678263241941706256996197460424029012419012634671862283532342656309677173602509498417976091509154360039893165037637034737020327399910409885798185771003505320583967737293415979917317338985837385734747478364242020380416892056650841470869294527543597349250299539682430605173321029026555546832473048600327036845781970289288898317888427517364945316709081173840186150794397479045034008257793436817683392375274635794835245695887",
"436463808505957768574894870394349739623346440601945961161254440072143298152040105676491048248110146278752857839930515766167441407021501229924721335644557342265864606569000117714935185566842453630868849121480179691838399545644365571106757731317371758557990781880691336695584799313313687287468894148823761785582982549586183756806449017542622267874275103877481475534991201849912222670102069951687572917937634467778042874315463238062009202992087620963771759666448266532858079402669920025224220613419441069718482837399612644978839925207109870840278194042158748845445131729137117098529028886770063736487420613144045836803985635654192482395882603511950547826439092832800532152534003936926017612446606135655146445620623395788978726744728503058670046885876251527122350275750995227",
"11424167473351836398078306042624362277956429440521137061889702611766348760692206243140413411077394583180726863277012016602279290144126785129569474909173584789822341986742719230331946072730319555984484911716797058875905400999504305877245849119687509023232790273637466821052576859232452982061831009770786031785669030271542286603956118755585683996118896215213488875253101894663403069677745948305893849505434201763745232895780711972432011344857521691017896316861403206449421332243658855453435784006517202894181640562433575390821384210960117518650374602256601091379644034244332285065935413233557998331562749140202965844219336298970011513882564935538704289446968322281451907487362046511461221329799897350993370560697505809686438782036235372137015731304779072430260986460269894522159103008260495503005267165927542949439526272736586626709581721032189532726389643625590680105784844246152702670169304203783072275089194754889511973916207",
"1214855636816562637502584060163403830270705000634713483015101384881871978446801224798536155406895823305035467591632531067547890948695117172076954220727075688048751022421198712032848890056357845974246560748347918630050853933697792254955890439720297560693579400297062396904306270145886830719309296352765295712183040773146419022875165382778007040109957609739589875590885701126197906063620133954893216612678838507540777138437797705602453719559017633986486649523611975865005712371194067612263330335590526176087004421363598470302731349138773205901447704682181517904064735636518462452242791676541725292378925568296858010151852326316777511935037531017413910506921922450666933202278489024521263798482237150056835746454842662048692127173834433089016107854491097456725016327709663199738238442164843147132789153725513257167915555162094970853584447993125488607696008169807374736711297007473812256272245489405898470297178738029484459690836250560495461579533254473316340608217876781986188705928270735695752830825527963838355419762516246028680280988020401914551825487349990306976304093109384451438813251211051597392127491464898797406789175453067960072008590614886532333015881171367104445044718144312416815712216611576221546455968770801413440778423979",
NULL
/* generic unrestricted moduli */
"17933601194860113372237070562165128350027320072176844226673287945873370751245439587792371960615073855669274087805055507977323024886880985062002853331424203",
"2893527720709661239493896562339544088620375736490408468011883030469939904368086092336458298221245707898933583190713188177399401852627749210994595974791782790253946539043962213027074922559572312141181787434278708783207966459019479487",
"347743159439876626079252796797422223177535447388206607607181663903045907591201940478223621722118173270898487582987137708656414344685816179420855160986340457973820182883508387588163122354089264395604796675278966117567294812714812796820596564876450716066283126720010859041484786529056457896367683122960411136319",
"47266428956356393164697365098120418976400602706072312735924071745438532218237979333351774907308168340693326687317443721193266215155735814510792148768576498491199122744351399489453533553203833318691678263241941706256996197460424029012419012634671862283532342656309677173602509498417976091509154360039893165037637034737020327399910409885798185771003505320583967737293415979917317338985837385734747478364242020380416892056650841470869294527543597349250299539682430605173321029026555546832473048600327036845781970289288898317888427517364945316709081173840186150794397479045034008257793436817683392375274635794835245695887",
"436463808505957768574894870394349739623346440601945961161254440072143298152040105676491048248110146278752857839930515766167441407021501229924721335644557342265864606569000117714935185566842453630868849121480179691838399545644365571106757731317371758557990781880691336695584799313313687287468894148823761785582982549586183756806449017542622267874275103877481475534991201849912222670102069951687572917937634467778042874315463238062009202992087620963771759666448266532858079402669920025224220613419441069718482837399612644978839925207109870840278194042158748845445131729137117098529028886770063736487420613144045836803985635654192482395882603511950547826439092832800532152534003936926017612446606135655146445620623395788978726744728503058670046885876251527122350275750995227",
"11424167473351836398078306042624362277956429440521137061889702611766348760692206243140413411077394583180726863277012016602279290144126785129569474909173584789822341986742719230331946072730319555984484911716797058875905400999504305877245849119687509023232790273637466821052576859232452982061831009770786031785669030271542286603956118755585683996118896215213488875253101894663403069677745948305893849505434201763745232895780711972432011344857521691017896316861403206449421332243658855453435784006517202894181640562433575390821384210960117518650374602256601091379644034244332285065935413233557998331562749140202965844219336298970011513882564935538704289446968322281451907487362046511461221329799897350993370560697505809686438782036235372137015731304779072430260986460269894522159103008260495503005267165927542949439526272736586626709581721032189532726389643625590680105784844246152702670169304203783072275089194754889511973916207",
"1214855636816562637502584060163403830270705000634713483015101384881871978446801224798536155406895823305035467591632531067547890948695117172076954220727075688048751022421198712032848890056357845974246560748347918630050853933697792254955890439720297560693579400297062396904306270145886830719309296352765295712183040773146419022875165382778007040109957609739589875590885701126197906063620133954893216612678838507540777138437797705602453719559017633986486649523611975865005712371194067612263330335590526176087004421363598470302731349138773205901447704682181517904064735636518462452242791676541725292378925568296858010151852326316777511935037531017413910506921922450666933202278489024521263798482237150056835746454842662048692127173834433089016107854491097456725016327709663199738238442164843147132789153725513257167915555162094970853584447993125488607696008169807374736711297007473812256272245489405898470297178738029484459690836250560495461579533254473316340608217876781986188705928270735695752830825527963838355419762516246028680280988020401914551825487349990306976304093109384451438813251211051597392127491464898797406789175453067960072008590614886532333015881171367104445044718144312416815712216611576221546455968770801413440778423979",
NULL
};
log = fopen("logs/expt.log", "w");
logb = fopen("logs/expt_dr.log", "w");
logc = fopen("logs/expt_2k.log", "w");
for (n = 0; primes[n]; n++) {
SLEEP;
mp_read_radix(&a, primes[n], 10);
mp_zero(&b);
for (rr = 0; rr < (unsigned)mp_count_bits(&a); rr++) {
mp_mul_2(&b, &b);
b.dp[0] |= lbit();
b.used += 1;
log = fopen("logs/expt.log", "w");
logb = fopen("logs/expt_dr.log", "w");
logc = fopen("logs/expt_2k.log", "w");
logd = fopen("logs/expt_2kl.log", "w");
for (n = 0; primes[n]; n++) {
SLEEP;
mp_read_radix(&a, primes[n], 10);
mp_zero(&b);
for (rr = 0; rr < (unsigned) mp_count_bits(&a); rr++) {
mp_mul_2(&b, &b);
b.dp[0] |= lbit();
b.used += 1;
}
mp_sub_d(&a, 1, &c);
mp_mod(&b, &c, &b);
mp_set(&c, 3);
rr = 0;
tt = -1;
do {
gg = TIMFUNC();
DO(mp_exptmod(&c, &b, &a, &d));
gg = (TIMFUNC() - gg) >> 1;
if (tt > gg)
tt = gg;
} while (++rr < 10);
mp_sub_d(&a, 1, &e);
mp_sub(&e, &b, &b);
mp_exptmod(&c, &b, &a, &e); /* c^(p-1-b) mod a */
mp_mulmod(&e, &d, &a, &d); /* c^b * c^(p-1-b) == c^p-1 == 1 */
if (mp_cmp_d(&d, 1)) {
printf("Different (%d)!!!\n", mp_count_bits(&a));
draw(&d);
exit(0);
}
printf("Exponentiating\t%4d-bit => %9llu/sec, %9llu cycles\n",
mp_count_bits(&a), CLK_PER_SEC / tt, tt);
fprintf(n < 4 ? logd : (n < 9) ? logc : (n < 16) ? logb : log,
"%d %9llu\n", mp_count_bits(&a), tt);
}
mp_sub_d(&a, 1, &c);
mp_mod(&b, &c, &b);
mp_set(&c, 3);
rr = 0;
tt = -1;
do {
gg = TIMFUNC();
DO(mp_exptmod(&c, &b, &a, &d));
gg = (TIMFUNC() - gg)>>1;
if (tt > gg) tt = gg;
} while (++rr < 10);
mp_sub_d(&a, 1, &e);
mp_sub(&e, &b, &b);
mp_exptmod(&c, &b, &a, &e); /* c^(p-1-b) mod a */
mp_mulmod(&e, &d, &a, &d); /* c^b * c^(p-1-b) == c^p-1 == 1 */
if (mp_cmp_d(&d, 1)) {
printf("Different (%d)!!!\n", mp_count_bits(&a));
draw(&d);
exit(0);
}
printf("Exponentiating\t%4d-bit => %9llu/sec, %9llu cycles\n", mp_count_bits(&a), CLK_PER_SEC/tt, tt);
fprintf((n < 6) ? logc : (n < 13) ? logb : log, "%d %9llu\n", mp_count_bits(&a), tt);
}
}
fclose(log);
fclose(logb);
fclose(logc);
fclose(logd);
log = fopen("logs/invmod.log", "w");
for (cnt = 4; cnt <= 128; cnt += 4) {
@@ -264,28 +287,29 @@ int main(void)
mp_rand(&b, cnt);
do {
mp_add_d(&b, 1, &b);
mp_gcd(&a, &b, &c);
mp_add_d(&b, 1, &b);
mp_gcd(&a, &b, &c);
} while (mp_cmp_d(&c, 1) != MP_EQ);
rr = 0;
tt = -1;
rr = 0;
tt = -1;
do {
gg = TIMFUNC();
DO(mp_invmod(&b, &a, &c));
gg = (TIMFUNC() - gg)>>1;
if (tt > gg) tt = gg;
gg = TIMFUNC();
DO(mp_invmod(&b, &a, &c));
gg = (TIMFUNC() - gg) >> 1;
if (tt > gg)
tt = gg;
} while (++rr < 1000);
mp_mulmod(&b, &c, &a, &d);
if (mp_cmp_d(&d, 1) != MP_EQ) {
printf("Failed to invert\n");
return 0;
printf("Failed to invert\n");
return 0;
}
printf("Inverting mod\t%4d-bit => %9llu/sec, %9llu cycles\n", mp_count_bits(&a), CLK_PER_SEC/tt, tt);
fprintf(log, "%d %9llu\n", cnt*DIGIT_BIT, tt);
printf("Inverting mod\t%4d-bit => %9llu/sec, %9llu cycles\n",
mp_count_bits(&a), CLK_PER_SEC / tt, tt);
fprintf(log, "%d %9llu\n", cnt * DIGIT_BIT, tt);
}
fclose(log);
return 0;
}

2
dep.pl
View File

@@ -13,6 +13,8 @@ print CLASS "#if !(defined(LTM1) && defined(LTM2) && defined(LTM3))\n#if defined
foreach my $filename (glob "bn*.c") {
my $define = $filename;
print "Processing $filename\n";
# convert filename to upper case so we can use it as a define
$define =~ tr/[a-z]/[A-Z]/;
$define =~ tr/\./_/;

View File

@@ -18,15 +18,15 @@ is_mersenne (long s, int *pp)
}
if ((res = mp_init (&u)) != MP_OKAY) {
goto __N;
goto LBL_N;
}
/* n = 2^s - 1 */
if ((res = mp_2expt(&n, s)) != MP_OKAY) {
goto __MU;
goto LBL_MU;
}
if ((res = mp_sub_d (&n, 1, &n)) != MP_OKAY) {
goto __MU;
goto LBL_MU;
}
/* set u=4 */
@@ -36,22 +36,22 @@ is_mersenne (long s, int *pp)
for (k = 1; k <= s - 2; k++) {
/* u = u^2 - 2 mod n */
if ((res = mp_sqr (&u, &u)) != MP_OKAY) {
goto __MU;
goto LBL_MU;
}
if ((res = mp_sub_d (&u, 2, &u)) != MP_OKAY) {
goto __MU;
goto LBL_MU;
}
/* make sure u is positive */
while (u.sign == MP_NEG) {
if ((res = mp_add (&u, &n, &u)) != MP_OKAY) {
goto __MU;
goto LBL_MU;
}
}
/* reduce */
if ((res = mp_reduce_2k (&u, &n, 1)) != MP_OKAY) {
goto __MU;
goto LBL_MU;
}
}
@@ -62,8 +62,8 @@ is_mersenne (long s, int *pp)
}
res = MP_OKAY;
__MU:mp_clear (&u);
__N:mp_clear (&n);
LBL_MU:mp_clear (&u);
LBL_N:mp_clear (&n);
return res;
}

View File

@@ -189,7 +189,7 @@ pprime (int k, int li, mp_int * p, mp_int * q)
}
if ((res = mp_init (&v)) != MP_OKAY) {
goto __C;
goto LBL_C;
}
/* product of first 50 primes */
@@ -197,34 +197,34 @@ pprime (int k, int li, mp_int * p, mp_int * q)
mp_read_radix (&v,
"19078266889580195013601891820992757757219839668357012055907516904309700014933909014729740190",
10)) != MP_OKAY) {
goto __V;
goto LBL_V;
}
if ((res = mp_init (&a)) != MP_OKAY) {
goto __V;
goto LBL_V;
}
/* set the prime */
mp_set (&a, prime_digit ());
if ((res = mp_init (&b)) != MP_OKAY) {
goto __A;
goto LBL_A;
}
if ((res = mp_init (&n)) != MP_OKAY) {
goto __B;
goto LBL_B;
}
if ((res = mp_init (&x)) != MP_OKAY) {
goto __N;
goto LBL_N;
}
if ((res = mp_init (&y)) != MP_OKAY) {
goto __X;
goto LBL_X;
}
if ((res = mp_init (&z)) != MP_OKAY) {
goto __Y;
goto LBL_Y;
}
/* now loop making the single digit */
@@ -236,25 +236,25 @@ pprime (int k, int li, mp_int * p, mp_int * q)
/* now compute z = a * b * 2 */
if ((res = mp_mul (&a, &b, &z)) != MP_OKAY) { /* z = a * b */
goto __Z;
goto LBL_Z;
}
if ((res = mp_copy (&z, &c)) != MP_OKAY) { /* c = a * b */
goto __Z;
goto LBL_Z;
}
if ((res = mp_mul_2 (&z, &z)) != MP_OKAY) { /* z = 2 * a * b */
goto __Z;
goto LBL_Z;
}
/* n = z + 1 */
if ((res = mp_add_d (&z, 1, &n)) != MP_OKAY) { /* n = z + 1 */
goto __Z;
goto LBL_Z;
}
/* check (n, v) == 1 */
if ((res = mp_gcd (&n, &v, &y)) != MP_OKAY) { /* y = (n, v) */
goto __Z;
goto LBL_Z;
}
if (mp_cmp_d (&y, 1) != MP_EQ)
@@ -266,7 +266,7 @@ pprime (int k, int li, mp_int * p, mp_int * q)
/* compute x^a mod n */
if ((res = mp_exptmod (&x, &a, &n, &y)) != MP_OKAY) { /* y = x^a mod n */
goto __Z;
goto LBL_Z;
}
/* if y == 1 loop */
@@ -275,7 +275,7 @@ pprime (int k, int li, mp_int * p, mp_int * q)
/* now x^2a mod n */
if ((res = mp_sqrmod (&y, &n, &y)) != MP_OKAY) { /* y = x^2a mod n */
goto __Z;
goto LBL_Z;
}
if (mp_cmp_d (&y, 1) == MP_EQ)
@@ -283,7 +283,7 @@ pprime (int k, int li, mp_int * p, mp_int * q)
/* compute x^b mod n */
if ((res = mp_exptmod (&x, &b, &n, &y)) != MP_OKAY) { /* y = x^b mod n */
goto __Z;
goto LBL_Z;
}
/* if y == 1 loop */
@@ -292,7 +292,7 @@ pprime (int k, int li, mp_int * p, mp_int * q)
/* now x^2b mod n */
if ((res = mp_sqrmod (&y, &n, &y)) != MP_OKAY) { /* y = x^2b mod n */
goto __Z;
goto LBL_Z;
}
if (mp_cmp_d (&y, 1) == MP_EQ)
@@ -300,7 +300,7 @@ pprime (int k, int li, mp_int * p, mp_int * q)
/* compute x^c mod n == x^ab mod n */
if ((res = mp_exptmod (&x, &c, &n, &y)) != MP_OKAY) { /* y = x^ab mod n */
goto __Z;
goto LBL_Z;
}
/* if y == 1 loop */
@@ -309,7 +309,7 @@ pprime (int k, int li, mp_int * p, mp_int * q)
/* now compute (x^c mod n)^2 */
if ((res = mp_sqrmod (&y, &n, &y)) != MP_OKAY) { /* y = x^2ab mod n */
goto __Z;
goto LBL_Z;
}
/* y should be 1 */
@@ -346,14 +346,14 @@ pprime (int k, int li, mp_int * p, mp_int * q)
mp_exch (&n, p);
res = MP_OKAY;
__Z:mp_clear (&z);
__Y:mp_clear (&y);
__X:mp_clear (&x);
__N:mp_clear (&n);
__B:mp_clear (&b);
__A:mp_clear (&a);
__V:mp_clear (&v);
__C:mp_clear (&c);
LBL_Z:mp_clear (&z);
LBL_Y:mp_clear (&y);
LBL_X:mp_clear (&x);
LBL_N:mp_clear (&n);
LBL_B:mp_clear (&b);
LBL_A:mp_clear (&a);
LBL_V:mp_clear (&v);
LBL_C:mp_clear (&c);
return res;
}

View File

@@ -10,13 +10,44 @@
*/
#define TIMES (1UL<<14UL)
/* RDTSC from Scott Duplichan */
static ulong64 TIMFUNC (void)
{
#if defined __GNUC__
#if defined(__i386__) || defined(__x86_64__)
unsigned long long a;
__asm__ __volatile__ ("rdtsc\nmovl %%eax,%0\nmovl %%edx,4+%0\n"::"m"(a):"%eax","%edx");
return a;
#else /* gcc-IA64 version */
unsigned long result;
__asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory");
while (__builtin_expect ((int) result == -1, 0))
__asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory");
return result;
#endif
// Microsoft and Intel Windows compilers
#elif defined _M_IX86
__asm rdtsc
#elif defined _M_AMD64
return __rdtsc ();
#elif defined _M_IA64
#if defined __INTEL_COMPILER
#include <ia64intrin.h>
#endif
return __getReg (3116);
#else
#error need rdtsc function for this build
#endif
}
#ifndef X86_TIMER
/* generic ISO C timer */
ulong64 __T;
void t_start(void) { __T = clock(); }
ulong64 t_read(void) { return clock() - __T; }
ulong64 LBL_T;
void t_start(void) { LBL_T = TIMFUNC(); }
ulong64 t_read(void) { return TIMFUNC() - LBL_T; }
#else
extern void t_start(void);

View File

@@ -1,16 +1,16 @@
224 222
448 330
672 436
896 520
1120 612
1344 696
1568 810
1792 912
2016 1006
2240 1116
2464 1152
2688 1284
2912 1348
3136 1486
3360 1580
3584 1636
480 87
960 111
1440 135
1920 159
2400 200
2880 224
3360 248
3840 272
4320 296
4800 320
5280 344
5760 368
6240 392
6720 416
7200 440
7680 464

View File

@@ -0,0 +1,7 @@
513 1489160
769 3688476
1025 8162061
2049 49260015
2561 89579052
3073 148797060
4097 324449263

View File

@@ -0,0 +1,5 @@
607 2272809
1279 9557382
2203 36250309
3217 87666486
4253 174168369

4
logs/expt_2kl.log Normal file
View File

@@ -0,0 +1,4 @@
1024 6954080
2048 35993987
4096 176068521
521 1683720

View File

@@ -0,0 +1,7 @@
532 1989592
784 3898697
1036 6519700
1540 15676650
2072 33128187
3080 82963362
4116 168358337

View File

@@ -1,143 +1,84 @@
140 1272
195 1428
252 1996
307 2586
364 3464
420 4420
476 5260
532 6430
588 7692
644 8704
699 10226
755 11670
812 13190
865 14834
924 16738
979 18362
1036 20660
1092 22776
1148 24848
1204 27168
1260 29930
1316 32258
1370 35172
1422 37534
1482 40390
1537 43990
1589 46946
1652 50438
1703 52902
1764 56646
1820 59892
1876 63248
1932 66872
1988 72596
2042 74662
2100 78512
2156 82944
2211 87444
2268 92170
2324 95534
2380 100484
2435 105024
2491 109460
2546 114154
2603 118946
2660 124110
2716 129300
2771 134274
2828 139594
2883 145234
2939 150332
2996 155750
3048 161718
3108 167492
3162 173882
3219 179766
3276 185560
3330 191826
3388 197822
3442 204176
3500 210682
3556 217236
3612 223484
3666 230714
3724 237744
3779 244080
3835 250970
3890 257914
3947 265162
4001 272128
4060 279108
4116 287606
4171 294716
4227 302806
4284 310260
4340 318564
4395 326164
4443 334034
4508 342108
4561 351810
4618 358828
4675 367332
4732 376140
4787 384172
4841 393308
4899 402036
4955 411286
5010 420290
5067 429688
5124 438810
5180 448130
5235 457264
5290 467390
5348 476586
5404 486120
5459 496512
5516 506624
5569 516346
5628 526604
5684 536544
5740 546936
5796 557284
5852 568106
5907 578824
5963 589204
6019 600176
6076 610564
6127 621972
6188 633564
6244 644730
6300 655288
6354 667402
6412 678824
6467 690594
6522 702718
6580 714148
6636 725608
6690 737834
6747 750100
6804 762202
6860 774184
6916 787298
6971 798734
7028 811162
7083 824570
7139 837738
7196 2579488
7245 2626714
7308 2643582
7364 2698746
7416 2734106
7476 2773372
7530 2816738
7588 2859204
7643 2938596
7698 2919716
7754 2988542
7812 3026520
7867 3058304
7924 3115790
7977 3161450
8035 3203138
8092 3244056
271 555
390 855
508 1161
631 1605
749 2117
871 2687
991 3329
1108 4084
1231 4786
1351 5624
1470 6392
1586 7364
1710 8218
1830 9255
1951 10217
2067 11461
2191 12463
2308 13677
2430 14800
2551 16232
2671 17460
2791 18899
2902 20247
3028 21902
3151 23240
3267 24927
3391 26441
3511 28277
3631 29838
3749 31751
3869 33673
3989 35431
4111 37518
4231 39426
4349 41504
4471 43567
4591 45786
4711 47876
4831 50299
4951 52427
5071 54785
5189 57241
5307 59730
5431 62194
5551 64761
5670 67322
5789 70073
5907 72663
6030 75437
6151 78242
6268 81202
6389 83948
6509 86985
6631 89903
6747 93184
6869 96044
6991 99286
7109 102395
7229 105917
7351 108940
7470 112490
7589 115702
7711 119508
7831 122632
7951 126410
8071 129808
8190 133895
8311 137146
8431 141218
8549 144732
8667 149131
8790 152462
8911 156754
9030 160479
9149 165138
9271 168601
9391 173185
9511 176988
9627 181976
9751 185539
9870 190388
9991 194335
10110 199605
10228 203298

View File

@@ -1,33 +1,84 @@
924 16686
1146 25334
1371 35304
1591 47122
1820 61500
2044 75254
2266 91732
2492 111656
2716 129428
2937 147508
3164 167758
3388 188248
3612 210826
3836 233814
4059 256898
4284 280210
4508 310372
4731 333902
4955 376502
5179 402854
5404 432004
5626 459010
5849 491868
6076 520550
6300 547400
6524 575968
6747 608482
6971 642850
7196 673670
7419 710680
7644 743942
7868 780394
8092 817342
271 560
391 870
511 1159
631 1605
750 2111
871 2737
991 3361
1111 4054
1231 4778
1351 5600
1471 6404
1591 7323
1710 8255
1831 9239
1948 10257
2070 11397
2190 12531
2308 13665
2429 14870
2550 16175
2671 17539
2787 18879
2911 20350
3031 21807
3150 23415
3270 24897
3388 26567
3511 28205
3627 30076
3751 31744
3869 33657
3991 35425
4111 37522
4229 39363
4351 41503
4470 43491
4590 45827
4711 47795
4828 50166
4951 52318
5070 54911
5191 57036
5308 58237
5431 60248
5551 62678
5671 64786
5791 67294
5908 69343
6031 71607
6151 74166
6271 76590
6391 78734
6511 81175
6631 83742
6750 86403
6868 88873
6990 91150
7110 94211
7228 96922
7351 99445
7469 102216
7589 104968
7711 108113
7827 110758
7950 113714
8071 116511
8186 119643
8310 122679
8425 125581
8551 128715
8669 131778
8788 135116
8910 138138
9031 141628
9148 144754
9268 148367
9391 151551
9511 155033
9631 158652
9751 162125
9871 165248
9988 168627
10111 172427
10231 176412

View File

@@ -1,143 +1,84 @@
139 806
195 1212
252 1604
307 2260
364 2892
420 3308
476 4152
532 4814
588 5754
644 6684
700 7226
756 8324
808 9092
866 10068
924 11204
976 12918
1036 13656
1092 15248
1148 15956
1204 17270
1260 19894
1316 20516
1370 21864
1428 25554
1483 26138
1540 27086
1596 29246
1652 32210
1707 32704
1764 35142
1820 39050
1876 39256
1931 41574
1985 45070
2044 46352
2099 48114
2155 51332
2212 53268
2267 55890
2324 59054
2380 60206
2434 63540
2491 66084
2547 68590
2604 74332
2660 74784
2715 77974
2772 79924
2826 82914
2884 87210
2929 89076
2996 92480
3052 96814
3108 99990
3162 102550
3219 105396
3276 109284
3332 113752
3387 116628
3444 120782
3500 122938
3556 127940
3612 303656
3667 312212
3724 324376
3779 329204
3833 340910
3892 353850
3943 362348
4003 367780
4056 380448
4114 393616
4172 404104
4227 415148
4284 409770
4339 436648
4394 442970
4451 463096
4507 472056
4564 485780
4616 496286
4675 507612
4732 519524
4788 536768
4843 542754
4899 553090
4956 571986
5012 586340
5068 599606
5124 613670
5179 624256
5235 636266
5292 655518
5348 668142
5403 677266
5460 696040
5516 712772
5570 723942
5628 739052
5684 755350
5739 769962
5790 775258
5851 790128
5908 814536
5962 827278
6018 844510
6076 851606
6130 865748
6188 894752
6244 900474
6300 928174
6356 928440
6410 957758
6468 981134
6524 994088
6580 1011124
6636 1027178
6692 1045466
6747 1056910
6804 1083784
6860 1104706
6915 1116450
6972 1137894
7028 1154670
7084 1158064
7138 1188734
7196 1214218
7249 1226822
7307 1247528
7363 1255338
7420 1291104
7475 1297940
7532 1324994
7587 1340274
7644 1342596
7698 1381418
7756 1382904
7812 1432588
7867 1443632
7922 1465092
7979 1496804
8036 1520142
8092 1539566
265 562
389 882
509 1207
631 1572
750 1990
859 2433
991 2894
1109 3555
1230 4228
1350 5018
1471 5805
1591 6579
1709 7415
1829 8329
1949 9225
2071 10139
2188 11239
2309 12178
2431 13212
2551 14294
2671 15551
2791 16512
2911 17718
3030 18876
3150 20259
3270 21374
3391 22650
3511 23948
3631 25493
3750 26756
3870 28225
3989 29705
4110 31409
4230 32834
4351 34327
4471 35818
4591 37636
4711 39228
4830 40868
4949 42393
5070 44541
5191 46269
5310 48162
5429 49728
5548 51985
5671 53948
5791 55885
5910 57584
6031 60082
6150 62239
6270 64309
6390 66014
6511 68766
6631 71012
6750 73172
6871 74952
6991 77909
7111 80371
7231 82666
7351 84531
7469 87698
7589 90318
7711 225384
7830 232428
7950 240009
8070 246522
8190 253662
8310 260961
8431 269253
8549 275743
8671 283769
8789 290811
8911 300034
9030 306873
9149 315085
9270 323944
9390 332390
9508 337519
9631 348986
9749 356904
9871 367013
9989 373831
10108 381033
10230 393475

View File

@@ -1,33 +1,84 @@
922 11272
1148 16004
1370 21958
1596 28684
1817 37832
2044 46386
2262 56218
2492 66388
2716 77478
2940 89380
3163 103680
3385 116274
3612 135334
3836 151332
4057 164938
4284 183178
4508 198864
4731 215222
4954 231986
5180 251660
5404 269414
5626 288454
5850 307806
6076 329458
6299 347726
6523 369864
6748 387832
6971 413010
7194 453310
7415 476936
7643 497118
7867 521394
8091 540224
271 560
388 878
511 1179
629 1625
751 1988
871 2423
989 2896
1111 3561
1231 4209
1350 5015
1470 5804
1591 6556
1709 7420
1831 8263
1951 9173
2070 10153
2191 11229
2310 12167
2431 13211
2550 14309
2671 15524
2788 16525
2910 17712
3028 18822
3148 20220
3271 21343
3391 22652
3511 23944
3630 25485
3750 26778
3868 28201
3990 29653
4111 31393
4225 32841
4350 34328
4471 35786
4590 37652
4711 39245
4830 40876
4951 42433
5068 44547
5191 46321
5311 48140
5430 49727
5550 52034
5671 53954
5791 55921
5908 57597
6031 60084
6148 62226
6270 64295
6390 66045
6511 68779
6629 71003
6751 73169
6871 74992
6991 77895
7110 80376
7231 82628
7351 84468
7470 87664
7591 90284
7711 91352
7828 93995
7950 96276
8071 98691
8190 101256
8308 103631
8431 105222
8550 108343
8671 110281
8787 112764
8911 115397
9031 117690
9151 120266
9271 122715
9391 124624
9510 127937
9630 130313
9750 132914
9871 136129
9991 138517
10108 141525
10231 144225

View File

@@ -1,16 +1,16 @@
224 216
448 324
672 428
896 532
1120 648
1344 766
1568 862
1792 928
2016 1070
2240 1128
2464 1250
2688 1344
2912 1436
3136 1542
3360 1628
3584 1696
480 94
960 116
1440 140
1920 164
2400 205
2880 229
3360 253
3840 277
4320 299
4800 321
5280 345
5760 371
6240 395
6720 419
7200 441
7680 465

View File

@@ -27,11 +27,13 @@ bn_mp_prime_is_prime.obj bn_mp_prime_next_prime.obj bn_mp_dr_reduce.obj \
bn_mp_dr_is_modulus.obj bn_mp_dr_setup.obj bn_mp_reduce_setup.obj \
bn_mp_toom_mul.obj bn_mp_toom_sqr.obj bn_mp_div_3.obj bn_s_mp_exptmod.obj \
bn_mp_reduce_2k.obj bn_mp_reduce_is_2k.obj bn_mp_reduce_2k_setup.obj \
bn_mp_reduce_2k_l.obj bn_mp_reduce_is_2k_l.obj bn_mp_reduce_2k_setup_l.obj \
bn_mp_radix_smap.obj bn_mp_read_radix.obj bn_mp_toradix.obj bn_mp_radix_size.obj \
bn_mp_fread.obj bn_mp_fwrite.obj bn_mp_cnt_lsb.obj bn_error.obj \
bn_mp_init_multi.obj bn_mp_clear_multi.obj bn_mp_exteuclid.obj bn_mp_toradix_n.obj \
bn_mp_prime_random_ex.obj bn_mp_get_int.obj bn_mp_sqrt.obj bn_mp_is_square.obj \
bn_mp_init_set.obj bn_mp_init_set_int.obj bn_mp_invmod_slow.obj bn_mp_prime_rabin_miller_trials.obj
bn_mp_init_set.obj bn_mp_init_set_int.obj bn_mp_invmod_slow.obj bn_mp_prime_rabin_miller_trials.obj \
bn_mp_to_signed_bin_n.obj bn_mp_to_unsigned_bin_n.obj
TARGET = libtommath.lib

View File

@@ -32,11 +32,13 @@ bn_mp_prime_is_prime.o bn_mp_prime_next_prime.o bn_mp_dr_reduce.o \
bn_mp_dr_is_modulus.o bn_mp_dr_setup.o bn_mp_reduce_setup.o \
bn_mp_toom_mul.o bn_mp_toom_sqr.o bn_mp_div_3.o bn_s_mp_exptmod.o \
bn_mp_reduce_2k.o bn_mp_reduce_is_2k.o bn_mp_reduce_2k_setup.o \
bn_mp_reduce_2k_l.o bn_mp_reduce_is_2k_l.o bn_mp_reduce_2k_setup_l.o \
bn_mp_radix_smap.o bn_mp_read_radix.o bn_mp_toradix.o bn_mp_radix_size.o \
bn_mp_fread.o bn_mp_fwrite.o bn_mp_cnt_lsb.o bn_error.o \
bn_mp_init_multi.o bn_mp_clear_multi.o bn_mp_exteuclid.o bn_mp_toradix_n.o \
bn_mp_prime_random_ex.o bn_mp_get_int.o bn_mp_sqrt.o bn_mp_is_square.o bn_mp_init_set.o \
bn_mp_init_set_int.o bn_mp_invmod_slow.o bn_mp_prime_rabin_miller_trials.o
bn_mp_init_set_int.o bn_mp_invmod_slow.o bn_mp_prime_rabin_miller_trials.o \
bn_mp_to_signed_bin_n.o bn_mp_to_unsigned_bin_n.o
# make a Windows DLL via Cygwin
windll: $(OBJECTS)

View File

@@ -21,6 +21,10 @@ CFLAGS += -I./
# Default to just generic max opts
CFLAGS += -O3 -xN
#install as this user
USER=root
GROUP=root
default: libtommath.a
#default files to install
@@ -55,11 +59,13 @@ bn_mp_prime_is_prime.o bn_mp_prime_next_prime.o bn_mp_dr_reduce.o \
bn_mp_dr_is_modulus.o bn_mp_dr_setup.o bn_mp_reduce_setup.o \
bn_mp_toom_mul.o bn_mp_toom_sqr.o bn_mp_div_3.o bn_s_mp_exptmod.o \
bn_mp_reduce_2k.o bn_mp_reduce_is_2k.o bn_mp_reduce_2k_setup.o \
bn_mp_reduce_2k_l.o bn_mp_reduce_is_2k_l.o bn_mp_reduce_2k_setup_l.o \
bn_mp_radix_smap.o bn_mp_read_radix.o bn_mp_toradix.o bn_mp_radix_size.o \
bn_mp_fread.o bn_mp_fwrite.o bn_mp_cnt_lsb.o bn_error.o \
bn_mp_init_multi.o bn_mp_clear_multi.o bn_mp_exteuclid.o bn_mp_toradix_n.o \
bn_mp_prime_random_ex.o bn_mp_get_int.o bn_mp_sqrt.o bn_mp_is_square.o bn_mp_init_set.o \
bn_mp_init_set_int.o bn_mp_invmod_slow.o bn_mp_prime_rabin_miller_trials.o
bn_mp_init_set_int.o bn_mp_invmod_slow.o bn_mp_prime_rabin_miller_trials.o \
bn_mp_to_signed_bin_n.o bn_mp_to_unsigned_bin_n.o
libtommath.a: $(OBJECTS)
$(AR) $(ARFLAGS) libtommath.a $(OBJECTS)
@@ -89,10 +95,10 @@ profiled_single:
ranlib libtommath.a
install: libtommath.a
install -d -g root -o root $(DESTDIR)$(LIBPATH)
install -d -g root -o root $(DESTDIR)$(INCPATH)
install -g root -o root $(LIBNAME) $(DESTDIR)$(LIBPATH)
install -g root -o root $(HEADERS) $(DESTDIR)$(INCPATH)
install -d -g $(GROUP) -o $(USER) $(DESTDIR)$(LIBPATH)
install -d -g $(GROUP) -o $(USER) $(DESTDIR)$(INCPATH)
install -g $(GROUP) -o $(USER) $(LIBNAME) $(DESTDIR)$(LIBPATH)
install -g $(GROUP) -o $(USER) $(HEADERS) $(DESTDIR)$(INCPATH)
test: libtommath.a demo/demo.o
$(CC) demo/demo.o libtommath.a -o test

View File

@@ -26,11 +26,13 @@ bn_mp_prime_is_prime.obj bn_mp_prime_next_prime.obj bn_mp_dr_reduce.obj \
bn_mp_dr_is_modulus.obj bn_mp_dr_setup.obj bn_mp_reduce_setup.obj \
bn_mp_toom_mul.obj bn_mp_toom_sqr.obj bn_mp_div_3.obj bn_s_mp_exptmod.obj \
bn_mp_reduce_2k.obj bn_mp_reduce_is_2k.obj bn_mp_reduce_2k_setup.obj \
bn_mp_reduce_2k_l.obj bn_mp_reduce_is_2k_l.obj bn_mp_reduce_2k_setup_l.obj \
bn_mp_radix_smap.obj bn_mp_read_radix.obj bn_mp_toradix.obj bn_mp_radix_size.obj \
bn_mp_fread.obj bn_mp_fwrite.obj bn_mp_cnt_lsb.obj bn_error.obj \
bn_mp_init_multi.obj bn_mp_clear_multi.obj bn_mp_exteuclid.obj bn_mp_toradix_n.obj \
bn_mp_prime_random_ex.obj bn_mp_get_int.obj bn_mp_sqrt.obj bn_mp_is_square.obj \
bn_mp_init_set.obj bn_mp_init_set_int.obj bn_mp_invmod_slow.obj bn_mp_prime_rabin_miller_trials.obj
bn_mp_init_set.obj bn_mp_init_set_int.obj bn_mp_invmod_slow.obj bn_mp_prime_rabin_miller_trials.obj \
bn_mp_to_signed_bin_n.obj bn_mp_to_unsigned_bin_n.obj
library: $(OBJECTS)
lib /out:tommath.lib $(OBJECTS)

View File

@@ -1,10 +1,9 @@
#Makefile for GCC
#
#Tom St Denis
VERSION=0:32
VERSION=0:35
CC = libtool --mode=compile gcc
CFLAGS += -I./ -Wall -W -Wshadow -Wsign-compare
#for speed
@@ -16,11 +15,15 @@ CFLAGS += -O3 -funroll-loops
#x86 optimizations [should be valid for any GCC install though]
CFLAGS += -fomit-frame-pointer
#install as this user
USER=root
GROUP=root
default: libtommath.la
#default files to install
LIBNAME=libtommath.la
HEADERS=tommath.h
HEADERS=tommath.h tommath_class.h tommath_superclass.h
#LIBPATH-The directory for libtommath to be installed to.
#INCPATH-The directory to install the header files for libtommath.
@@ -50,18 +53,21 @@ bn_mp_prime_is_prime.o bn_mp_prime_next_prime.o bn_mp_dr_reduce.o \
bn_mp_dr_is_modulus.o bn_mp_dr_setup.o bn_mp_reduce_setup.o \
bn_mp_toom_mul.o bn_mp_toom_sqr.o bn_mp_div_3.o bn_s_mp_exptmod.o \
bn_mp_reduce_2k.o bn_mp_reduce_is_2k.o bn_mp_reduce_2k_setup.o \
bn_mp_reduce_2k_l.o bn_mp_reduce_is_2k_l.o bn_mp_reduce_2k_setup_l.o \
bn_mp_radix_smap.o bn_mp_read_radix.o bn_mp_toradix.o bn_mp_radix_size.o \
bn_mp_fread.o bn_mp_fwrite.o bn_mp_cnt_lsb.o bn_error.o \
bn_mp_init_multi.o bn_mp_clear_multi.o bn_mp_exteuclid.o bn_mp_toradix_n.o \
bn_mp_prime_random_ex.o bn_mp_get_int.o bn_mp_sqrt.o bn_mp_is_square.o bn_mp_init_set.o \
bn_mp_init_set_int.o bn_mp_invmod_slow.o bn_mp_prime_rabin_miller_trials.o
bn_mp_init_set_int.o bn_mp_invmod_slow.o bn_mp_prime_rabin_miller_trials.o \
bn_mp_to_signed_bin_n.o bn_mp_to_unsigned_bin_n.o
libtommath.la: $(OBJECTS)
libtool --mode=link gcc *.lo -o libtommath.la -rpath $(LIBPATH) -version-info $(VERSION)
libtool --mode=link gcc *.o -o libtommath.a
libtool --mode=install install -c libtommath.la $(LIBPATH)/libtommath.la
install -d -g root -o root $(DESTDIR)$(INCPATH)
install -g root -o root $(HEADERS) $(DESTDIR)$(INCPATH)
install -d -g $(GROUP) -o $(USER) $(DESTDIR)$(INCPATH)
install -g $(GROUP) -o $(USER) $(HEADERS) $(DESTDIR)$(INCPATH)
test: libtommath.a demo/demo.o
gcc $(CFLAGS) -c demo/demo.c -o demo/demo.o

View File

@@ -46,7 +46,7 @@ void rand_num(mp_int *a)
int n, size;
unsigned char buf[2048];
size = 1 + ((fgetc(rng)<<8) + fgetc(rng)) % 1031;
size = 1 + ((fgetc(rng)<<8) + fgetc(rng)) % 101;
buf[0] = (fgetc(rng)&1)?1:0;
fread(buf+1, 1, size, rng);
while (buf[1] == 0) buf[1] = fgetc(rng);
@@ -58,7 +58,7 @@ void rand_num2(mp_int *a)
int n, size;
unsigned char buf[2048];
size = 10 + ((fgetc(rng)<<8) + fgetc(rng)) % 97;
size = 10 + ((fgetc(rng)<<8) + fgetc(rng)) % 101;
buf[0] = (fgetc(rng)&1)?1:0;
fread(buf+1, 1, size, rng);
while (buf[1] == 0) buf[1] = fgetc(rng);

File diff suppressed because it is too large Load Diff

35
tombc/grammar.txt Normal file
View File

@@ -0,0 +1,35 @@
program := program statement | statement | empty
statement := { statement } |
identifier = numexpression; |
identifier[numexpression] = numexpression; |
function(expressionlist); |
for (identifer = numexpression; numexpression; identifier = numexpression) { statement } |
while (numexpression) { statement } |
if (numexpresion) { statement } elif |
break; |
continue;
elif := else statement | empty
function := abs | countbits | exptmod | jacobi | print | isprime | nextprime | issquare | readinteger | exit
expressionlist := expressionlist, expression | expression
// LR(1) !!!?
expression := string | numexpression
numexpression := cmpexpr && cmpexpr | cmpexpr \|\| cmpexpr | cmpexpr
cmpexpr := boolexpr < boolexpr | boolexpr > boolexpr | boolexpr == boolexpr |
boolexpr <= boolexpr | boolexpr >= boolexpr | boolexpr
boolexpr := shiftexpr & shiftexpr | shiftexpr ^ shiftexpr | shiftexpr \| shiftexpr | shiftexpr
shiftexpr := addsubexpr << addsubexpr | addsubexpr >> addsubexpr | addsubexpr
addsubexpr := mulexpr + mulexpr | mulexpr - mulexpr | mulexpr
mulexpr := expr * expr | expr / expr | expr % expr | expr
expr := -nexpr | nexpr
nexpr := integer | identifier | ( numexpression ) | identifier[numexpression]
identifier := identifer digits | identifier alpha | alpha
alpha := a ... z | A ... Z
integer := hexnumber | digits
hexnumber := 0xhexdigits
hexdigits := hexdigits hexdigit | hexdigit
hexdigit := 0 ... 9 | a ... f | A ... F
digits := digits digit | digit
digit := 0 ... 9

View File

@@ -429,6 +429,15 @@ int mp_reduce_2k_setup(mp_int *a, mp_digit *d);
/* reduces a modulo b where b is of the form 2**p - k [0 <= a] */
int mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d);
/* returns true if a can be reduced with mp_reduce_2k_l */
int mp_reduce_is_2k_l(mp_int *a);
/* determines k value for 2k reduction */
int mp_reduce_2k_setup_l(mp_int *a, mp_int *d);
/* reduces a modulo b where b is of the form 2**p - k [0 <= a] */
int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d);
/* d = a**b (mod c) */
int mp_exptmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d);
@@ -442,7 +451,7 @@ int mp_exptmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d);
#endif
/* table of first PRIME_SIZE primes */
extern const mp_digit __prime_tab[];
extern const mp_digit ltm_prime_tab[];
/* result=1 if a is divisible by one of the first PRIME_SIZE primes */
int mp_prime_is_divisible(mp_int *a, int *result);
@@ -511,12 +520,14 @@ int mp_count_bits(mp_int *a);
int mp_unsigned_bin_size(mp_int *a);
int mp_read_unsigned_bin(mp_int *a, unsigned char *b, int c);
int mp_to_unsigned_bin(mp_int *a, unsigned char *b);
int mp_to_unsigned_bin_n (mp_int * a, unsigned char *b, unsigned long *outlen);
int mp_signed_bin_size(mp_int *a);
int mp_read_signed_bin(mp_int *a, unsigned char *b, int c);
int mp_to_signed_bin(mp_int *a, unsigned char *b);
int mp_to_signed_bin_n (mp_int * a, unsigned char *b, unsigned long *outlen);
int mp_read_radix(mp_int *a, char *str, int radix);
int mp_read_radix(mp_int *a, const char *str, int radix);
int mp_toradix(mp_int *a, char *str, int radix);
int mp_toradix_n(mp_int * a, char *str, int radix, int maxlen);
int mp_radix_size(mp_int *a, int radix, int *size);
@@ -554,7 +565,7 @@ int fast_mp_invmod(mp_int *a, mp_int *b, mp_int *c);
int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c);
int fast_mp_montgomery_reduce(mp_int *a, mp_int *m, mp_digit mp);
int mp_exptmod_fast(mp_int *G, mp_int *X, mp_int *P, mp_int *Y, int mode);
int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y);
int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int mode);
void bn_reverse(unsigned char *s, int len);
extern const char *mp_s_rmap;

View File

@@ -49,7 +49,7 @@
\begin{document}
\frontmatter
\pagestyle{empty}
\title{Implementing Multiple Precision Arithmetic \\ ~ \\ Draft Edition }
\title{Multi--Precision Math}
\author{\mbox{
%\begin{small}
\begin{tabular}{c}
@@ -66,7 +66,7 @@ QUALCOMM Australia \\
}
}
\maketitle
This text has been placed in the public domain. This text corresponds to the v0.30 release of the
This text has been placed in the public domain. This text corresponds to the v0.35 release of the
LibTomMath project.
\begin{alltt}
@@ -85,66 +85,32 @@ This text is formatted to the international B5 paper size of 176mm wide by 250mm
\tableofcontents
\listoffigures
\chapter*{Prefaces to the Draft Edition}
I started this text in April 2003 to complement my LibTomMath library. That is, explain how to implement the functions
contained in LibTomMath. The goal is to have a textbook that any Computer Science student can use when implementing their
own multiple precision arithmetic. The plan I wanted to follow was flesh out all the
ideas and concepts I had floating around in my head and then work on it afterwards refining a little bit at a time. Chance
would have it that I ended up with my summer off from Algonquin College and I was given four months solid to work on the
text.
\chapter*{Prefaces}
When I tell people about my LibTom projects and that I release them as public domain they are often puzzled.
They ask why I did it and especially why I continue to work on them for free. The best I can explain it is ``Because I can.''
Which seems odd and perhaps too terse for adult conversation. I often qualify it with ``I am able, I am willing.'' which
perhaps explains it better. I am the first to admit there is not anything that special with what I have done. Perhaps
others can see that too and then we would have a society to be proud of. My LibTom projects are what I am doing to give
back to society in the form of tools and knowledge that can help others in their endeavours.
Choosing to not waste any time I dove right into the project even before my spring semester was finished. I wrote a bit
off and on at first. The moment my exams were finished I jumped into long 12 to 16 hour days. The result after only
a couple of months was a ten chapter, three hundred page draft that I quickly had distributed to anyone who wanted
to read it. I had Jean-Luc Cooke print copies for me and I brought them to Crypto'03 in Santa Barbara. So far I have
managed to grab a certain level of attention having people from around the world ask me for copies of the text was certain
rewarding.
I started writing this book because it was the most logical task to further my goal of open academia. The LibTomMath source
code itself was written to be easy to follow and learn from. There are times, however, where pure C source code does not
explain the algorithms properly. Hence this book. The book literally starts with the foundation of the library and works
itself outwards to the more complicated algorithms. The use of both pseudo--code and verbatim source code provides a duality
of ``theory'' and ``practice'' that the computer science students of the world shall appreciate. I never deviate too far
from relatively straightforward algebra and I hope that this book can be a valuable learning asset.
Now we are past December 2003. By this time I had pictured that I would have at least finished my second draft of the text.
Currently I am far off from this goal. I've done partial re-writes of chapters one, two and three but they are not even
finished yet. I haven't given up on the project, only had some setbacks. First O'Reilly declined to publish the text then
Addison-Wesley and Greg is tried another which I don't know the name of. However, at this point I want to focus my energy
onto finishing the book not securing a contract.
This book and indeed much of the LibTom projects would not exist in their current form if it was not for a plethora
of kind people donating their time, resources and kind words to help support my work. Writing a text of significant
length (along with the source code) is a tiresome and lengthy process. Currently the LibTom project is four years old,
comprises of literally thousands of users and over 100,000 lines of source code, TeX and other material. People like Mads and Greg
were there at the beginning to encourage me to work well. It is amazing how timely validation from others can boost morale to
continue the project. Definitely my parents were there for me by providing room and board during the many months of work in 2003.
So why am I writing this text? It seems like a lot of work right? Most certainly it is a lot of work writing a textbook.
Even the simplest introductory material has to be lined with references and figures. A lot of the text has to be re-written
from point form to prose form to ensure an easier read. Why am I doing all this work for free then? Simple. My philosophy
is quite simply ``Open Source. Open Academia. Open Minds'' which means that to achieve a goal of open minds, that is,
people willing to accept new ideas and explore the unknown you have to make available material they can access freely
without hinderance.
To my many friends whom I have met through the years I thank you for the good times and the words of encouragement. I hope I
honour your kind gestures with this project.
I've been writing free software since I was about sixteen but only recently have I hit upon software that people have come
to depend upon. I started LibTomCrypt in December 2001 and now several major companies use it as integral portions of their
software. Several educational institutions use it as a matter of course and many freelance developers use it as
part of their projects. To further my contributions I started the LibTomMath project in December 2002 aimed at providing
multiple precision arithmetic routines that students could learn from. That is write routines that are not only easy
to understand and follow but provide quite impressive performance considering they are all in standard portable ISO C.
The second leg of my philosophy is ``Open Academia'' which is where this textbook comes in. In the end, when all is
said and done the text will be useable by educational institutions as a reference on multiple precision arithmetic.
At this time I feel I should share a little information about myself. The most common question I was asked at
Crypto'03, perhaps just out of professional courtesy, was which school I either taught at or attended. The unfortunate
truth is that I neither teach at or attend a school of academic reputation. I'm currently at Algonquin College which
is what I'd like to call ``somewhat academic but mostly vocational'' college. In otherwords, job training.
I'm a 21 year old computer science student mostly self-taught in the areas I am aware of (which includes a half-dozen
computer science fields, a few fields of mathematics and some English). I look forward to teaching someday but I am
still far off from that goal.
Now it would be improper for me to not introduce the rest of the texts co-authors. While they are only contributing
corrections and editorial feedback their support has been tremendously helpful in presenting the concepts laid out
in the text so far. Greg has always been there for me. He has tracked my LibTom projects since their inception and even
sent cheques to help pay tuition from time to time. His background has provided a wonderful source to bounce ideas off
of and improve the quality of my writing. Mads is another fellow who has just ``been there''. I don't even recall what
his interest in the LibTom projects is but I'm definitely glad he has been around. His ability to catch logical errors
in my written English have saved me on several occasions to say the least.
What to expect next? Well this is still a rough draft. I've only had the chance to update a few chapters. However, I've
been getting the feeling that people are starting to use my text and I owe them some updated material. My current tenative
plan is to edit one chapter every two weeks starting January 4th. It seems insane but my lower course load at college
should provide ample time. By Crypto'04 I plan to have a 2nd draft of the text polished and ready to hand out to as many
people who will take it.
Open Source. Open Academia. Open Minds.
\begin{flushright} Tom St Denis \end{flushright}
@@ -937,7 +903,7 @@ assumed to contain undefined values they are initially set to zero.
EXAM,bn_mp_grow.c
A quick optimization is to first determine if a memory re-allocation is required at all. The if statement (line @23,if@) checks
A quick optimization is to first determine if a memory re-allocation is required at all. The if statement (line @24,alloc@) checks
if the \textbf{alloc} member of the mp\_int is smaller than the requested digit count. If the count is not larger than \textbf{alloc}
the function skips the re-allocation part thus saving time.
@@ -1310,7 +1276,7 @@ After the function is completed, all of the digits are zeroed, the \textbf{used}
With the mp\_int representation of an integer, calculating the absolute value is trivial. The mp\_abs algorithm will compute
the absolute value of an mp\_int.
\newpage\begin{figure}[here]
\begin{figure}[here]
\begin{center}
\begin{tabular}{l}
\hline Algorithm \textbf{mp\_abs}. \\
@@ -1335,6 +1301,9 @@ logic to handle it.
EXAM,bn_mp_abs.c
This fairly trivial algorithm first eliminates non--required duplications (line @27,a != b@) and then sets the
\textbf{sign} flag to \textbf{MP\_ZPOS}.
\subsection{Integer Negation}
With the mp\_int representation of an integer, calculating the negation is also trivial. The mp\_neg algorithm will compute
the negative of an mp\_int input.
@@ -1368,11 +1337,15 @@ zero as negative.
EXAM,bn_mp_neg.c
Like mp\_abs() this function avoids non--required duplications (line @21,a != b@) and then sets the sign. We
have to make sure that only non--zero values get a \textbf{sign} of \textbf{MP\_NEG}. If the mp\_int is zero
than the \textbf{sign} is hard--coded to \textbf{MP\_ZPOS}.
\section{Small Constants}
\subsection{Setting Small Constants}
Often a mp\_int must be set to a relatively small value such as $1$ or $2$. For these cases the mp\_set algorithm is useful.
\begin{figure}[here]
\newpage\begin{figure}[here]
\begin{center}
\begin{tabular}{l}
\hline Algorithm \textbf{mp\_set}. \\
@@ -1397,11 +1370,14 @@ single digit is set (\textit{modulo $\beta$}) and the \textbf{used} count is adj
EXAM,bn_mp_set.c
Line @21,mp_zero@ calls mp\_zero() to clear the mp\_int and reset the sign. Line @22,MP_MASK@ copies the digit
into the least significant location. Note the usage of a new constant \textbf{MP\_MASK}. This constant is used to quickly
reduce an integer modulo $\beta$. Since $\beta$ is of the form $2^k$ for any suitable $k$ it suffices to perform a binary AND with
$MP\_MASK = 2^k - 1$ to perform the reduction. Finally line @23,a->used@ will set the \textbf{used} member with respect to the
digit actually set. This function will always make the integer positive.
First we zero (line @21,mp_zero@) the mp\_int to make sure that the other members are initialized for a
small positive constant. mp\_zero() ensures that the \textbf{sign} is positive and the \textbf{used} count
is zero. Next we set the digit and reduce it modulo $\beta$ (line @22,MP_MASK@). After this step we have to
check if the resulting digit is zero or not. If it is not then we set the \textbf{used} count to one, otherwise
to zero.
We can quickly reduce modulo $\beta$ since it is of the form $2^k$ and a quick binary AND operation with
$2^k - 1$ will perform the same operation.
One important limitation of this function is that it will only set one digit. The size of a digit is not fixed, meaning source that uses
this function should take that into account. Only trivially small constants can be set using this function.
@@ -1503,10 +1479,12 @@ the zero'th digit. If after all of the digits have been compared, no difference
EXAM,bn_mp_cmp_mag.c
The two if statements on lines @24,if@ and @28,if@ compare the number of digits in the two inputs. These two are performed before all of the digits
are compared since it is a very cheap test to perform and can potentially save considerable time. The implementation given is also not valid
without those two statements. $b.alloc$ may be smaller than $a.used$, meaning that undefined values will be read from $b$ past the end of the
array of digits.
The two if statements (lines @24,if@ and @28,if@) compare the number of digits in the two inputs. These two are
performed before all of the digits are compared since it is a very cheap test to perform and can potentially save
considerable time. The implementation given is also not valid without those two statements. $b.alloc$ may be
smaller than $a.used$, meaning that undefined values will be read from $b$ past the end of the array of digits.
\subsection{Signed Comparisons}
Comparing with sign considerations is also fairly critical in several routines (\textit{division for example}). Based on an unsigned magnitude
@@ -1539,9 +1517,9 @@ $\vert a \vert < \vert b \vert$. Step number four will compare the two when the
EXAM,bn_mp_cmp.c
The two if statements on lines @22,if@ and @26,if@ perform the initial sign comparison. If the signs are not the equal then which ever
has the positive sign is larger. At line @30,if@, the inputs are compared based on magnitudes. If the signs were both negative then
the unsigned comparison is performed in the opposite direction (\textit{line @31,mp_cmp_mag@}). Otherwise, the signs are assumed to
The two if statements (lines @22,if@ and @26,if@) perform the initial sign comparison. If the signs are not the equal then which ever
has the positive sign is larger. The inputs are compared (line @30,if@) based on magnitudes. If the signs were both
negative then the unsigned comparison is performed in the opposite direction (line @31,mp_cmp_mag@). Otherwise, the signs are assumed to
be both positive and a forward direction unsigned comparison is performed.
\section*{Exercises}
@@ -1664,19 +1642,21 @@ The final carry is stored in $c_{max}$ and digits above $max$ upto $oldused$ are
EXAM,bn_s_mp_add.c
Lines @27,if@ to @35,}@ perform the initial sorting of the inputs and determine the $min$ and $max$ variables. Note that $x$ is a pointer to a
mp\_int assigned to the largest input, in effect it is a local alias. Lines @37,init@ to @42,}@ ensure that the destination is grown to
accomodate the result of the addition.
We first sort (lines @27,if@ to @35,}@) the inputs based on magnitude and determine the $min$ and $max$ variables.
Note that $x$ is a pointer to an mp\_int assigned to the largest input, in effect it is a local alias. Next we
grow the destination (@37,init@ to @42,}@) ensure that it can accomodate the result of the addition.
Similar to the implementation of mp\_copy this function uses the braced code and local aliases coding style. The three aliases that are on
lines @56,tmpa@, @59,tmpb@ and @62,tmpc@ represent the two inputs and destination variables respectively. These aliases are used to ensure the
compiler does not have to dereference $a$, $b$ or $c$ (respectively) to access the digits of the respective mp\_int.
The initial carry $u$ is cleared on line @65,u = 0@, note that $u$ is of type mp\_digit which ensures type compatibility within the
implementation. The initial addition loop begins on line @66,for@ and ends on line @75,}@. Similarly the conditional addition loop
begins on line @81,for@ and ends on line @90,}@. The addition is finished with the final carry being stored in $tmpc$ on line @94,tmpc++@.
Note the ``++'' operator on the same line. After line @94,tmpc++@ $tmpc$ will point to the $c.used$'th digit of the mp\_int $c$. This is useful
for the next loop on lines @97,for@ to @99,}@ which set any old upper digits to zero.
The initial carry $u$ will be cleared (line @65,u = 0@), note that $u$ is of type mp\_digit which ensures type
compatibility within the implementation. The initial addition (line @66,for@ to @75,}@) adds digits from
both inputs until the smallest input runs out of digits. Similarly the conditional addition loop
(line @81,for@ to @90,}@) adds the remaining digits from the larger of the two inputs. The addition is finished
with the final carry being stored in $tmpc$ (line @94,tmpc++@). Note the ``++'' operator within the same expression.
After line @94,tmpc++@, $tmpc$ will point to the $c.used$'th digit of the mp\_int $c$. This is useful
for the next loop (line @97,for@ to @99,}@) which set any old upper digits to zero.
\subsection{Low Level Subtraction}
The low level unsigned subtraction algorithm is very similar to the low level unsigned addition algorithm. The principle difference is that the
@@ -1692,7 +1672,7 @@ this algorithm we will assume that the variable $\gamma$ represents the number o
mp\_digit (\textit{this implies $2^{\gamma} > \beta$}).
For example, the default for LibTomMath is to use a ``unsigned long'' for the mp\_digit ``type'' while $\beta = 2^{28}$. In ISO C an ``unsigned long''
data type must be able to represent $0 \le x < 2^{32}$ meaning that in this case $\gamma = 32$.
data type must be able to represent $0 \le x < 2^{32}$ meaning that in this case $\gamma \ge 32$.
\newpage\begin{figure}[!here]
\begin{center}
@@ -1759,20 +1739,23 @@ If $b$ has a smaller magnitude than $a$ then step 9 will force the carry and cop
EXAM,bn_s_mp_sub.c
Line @24,min@ and @25,max@ perform the initial hardcoded sorting of the inputs. In reality the $min$ and $max$ variables are only aliases and are only
used to make the source code easier to read. Again the pointer alias optimization is used within this algorithm. Lines @42,tmpa@, @43,tmpb@ and @44,tmpc@ initialize the aliases for
$a$, $b$ and $c$ respectively.
Like low level addition we ``sort'' the inputs. Except in this case the sorting is hardcoded
(lines @24,min@ and @25,max@). In reality the $min$ and $max$ variables are only aliases and are only
used to make the source code easier to read. Again the pointer alias optimization is used
within this algorithm. The aliases $tmpa$, $tmpb$ and $tmpc$ are initialized
(lines @42,tmpa@, @43,tmpb@ and @44,tmpc@) for $a$, $b$ and $c$ respectively.
The first subtraction loop occurs on lines @47,u = 0@ through @61,}@. The theory behind the subtraction loop is exactly the same as that for
the addition loop. As remarked earlier there is an implementation reason for using the ``awkward'' method of extracting the carry
(\textit{see line @57, >>@}). The traditional method for extracting the carry would be to shift by $lg(\beta)$ positions and logically AND
the least significant bit. The AND operation is required because all of the bits above the $\lg(\beta)$'th bit will be set to one after a carry
occurs from subtraction. This carry extraction requires two relatively cheap operations to extract the carry. The other method is to simply
shift the most significant bit to the least significant bit thus extracting the carry with a single cheap operation. This optimization only works on
twos compliment machines which is a safe assumption to make.
The first subtraction loop (lines @47,u = 0@ through @61,}@) subtract digits from both inputs until the smaller of
the two inputs has been exhausted. As remarked earlier there is an implementation reason for using the ``awkward''
method of extracting the carry (line @57, >>@). The traditional method for extracting the carry would be to shift
by $lg(\beta)$ positions and logically AND the least significant bit. The AND operation is required because all of
the bits above the $\lg(\beta)$'th bit will be set to one after a carry occurs from subtraction. This carry
extraction requires two relatively cheap operations to extract the carry. The other method is to simply shift the
most significant bit to the least significant bit thus extracting the carry with a single cheap operation. This
optimization only works on twos compliment machines which is a safe assumption to make.
If $a$ has a larger magnitude than $b$ an additional loop (\textit{see lines @64,for@ through @73,}@}) is required to propagate the carry through
$a$ and copy the result to $c$.
If $a$ has a larger magnitude than $b$ an additional loop (lines @64,for@ through @73,}@) is required to propagate
the carry through $a$ and copy the result to $c$.
\subsection{High Level Addition}
Now that both lower level addition and subtraction algorithms have been established an effective high level signed addition algorithm can be
@@ -2098,10 +2081,11 @@ FIGU,sliding_window,Sliding Window Movement
EXAM,bn_mp_lshd.c
The if statement on line @24,if@ ensures that the $b$ variable is greater than zero. The \textbf{used} count is incremented by $b$ before
the copy loop begins. This elminates the need for an additional variable in the for loop. The variable $top$ on line @42,top@ is an alias
for the leading digit while $bottom$ on line @45,bottom@ is an alias for the trailing edge. The aliases form a window of exactly $b$ digits
over the input.
The if statement (line @24,if@) ensures that the $b$ variable is greater than zero since we do not interpret negative
shift counts properly. The \textbf{used} count is incremented by $b$ before the copy loop begins. This elminates
the need for an additional variable in the for loop. The variable $top$ (line @42,top@) is an alias
for the leading digit while $bottom$ (line @45,bottom@) is an alias for the trailing edge. The aliases form a
window of exactly $b$ digits over the input.
\subsection{Division by $x$}
@@ -2151,9 +2135,9 @@ Once the window copy is complete the upper digits must be zeroed and the \textbf
EXAM,bn_mp_rshd.c
The only noteworthy element of this routine is the lack of a return type.
-- Will update later to give it a return type...Tom
The only noteworthy element of this routine is the lack of a return type since it cannot fail. Like mp\_lshd() we
form a sliding window except we copy in the other direction. After the window (line @59,for (;@) we then zero
the upper digits of the input to make sure the result is correct.
\section{Powers of Two}
@@ -2214,7 +2198,15 @@ complete. It is possible to optimize this algorithm down to a $O(n)$ algorithm
EXAM,bn_mp_mul_2d.c
Notes to be revised when code is updated. -- Tom
The shifting is performed in--place which means the first step (line @24,a != c@) is to copy the input to the
destination. We avoid calling mp\_copy() by making sure the mp\_ints are different. The destination then
has to be grown (line @31,grow@) to accomodate the result.
If the shift count $b$ is larger than $lg(\beta)$ then a call to mp\_lshd() is used to handle all of the multiples
of $lg(\beta)$. Leaving only a remaining shift of $lg(\beta) - 1$ or fewer bits left. Inside the actual shift
loop (lines @45,if@ to @76,}@) we make use of pre--computed values $shift$ and $mask$. These are used to
extract the carry bit(s) to pass into the next iteration of the loop. The $r$ and $rr$ variables form a
chain between consecutive iterations to propagate the carry.
\subsection{Division by Power of Two}
@@ -2263,7 +2255,8 @@ ignored by passing \textbf{NULL} as the pointer to the mp\_int variable. The
result of the remainder operation until the end. This allows $d$ and $a$ to represent the same mp\_int without modifying $a$ before
the quotient is obtained.
The remainder of the source code is essentially the same as the source code for mp\_mul\_2d. (-- Fix this paragraph up later, Tom).
The remainder of the source code is essentially the same as the source code for mp\_mul\_2d. The only significant difference is
the direction of the shifts.
\subsection{Remainder of Division by Power of Two}
@@ -2306,7 +2299,13 @@ is copied to $b$, leading digits are removed and the remaining leading digit is
EXAM,bn_mp_mod_2d.c
-- Add comments later, Tom.
We first avoid cases of $b \le 0$ by simply mp\_zero()'ing the destination in such cases. Next if $2^b$ is larger
than the input we just mp\_copy() the input and return right away. After this point we know we must actually
perform some work to produce the remainder.
Recalling that reducing modulo $2^k$ and a binary ``and'' with $2^k - 1$ are numerically equivalent we can quickly reduce
the number. First we zero any digits above the last digit in $2^b$ (line @41,for@). Next we reduce the
leading digit of both (line @45,&=@) and then mp\_clamp().
\section*{Exercises}
\begin{tabular}{cl}
@@ -2464,33 +2463,46 @@ exceed the precision requested.
EXAM,bn_s_mp_mul_digs.c
Lines @31,if@ to @35,}@ determine if the Comba method can be used first. The conditions for using the Comba routine are that min$(a.used, b.used) < \delta$ and
the number of digits of output is less than \textbf{MP\_WARRAY}. This new constant is used to control
the stack usage in the Comba routines. By default it is set to $\delta$ but can be reduced when memory is at a premium.
First we determine (line @30,if@) if the Comba method can be used first since it's faster. The conditions for
sing the Comba routine are that min$(a.used, b.used) < \delta$ and the number of digits of output is less than
\textbf{MP\_WARRAY}. This new constant is used to control the stack usage in the Comba routines. By default it is
set to $\delta$ but can be reduced when memory is at a premium.
Of particular importance is the calculation of the $ix+iy$'th column on lines @64,mp_word@, @65,mp_word@ and @66,mp_word@. Note how all of the
variables are cast to the type \textbf{mp\_word}, which is also the type of variable $\hat r$. That is to ensure that double precision operations
are used instead of single precision. The multiplication on line @65,) * (@ makes use of a specific GCC optimizer behaviour. On the outset it looks like
the compiler will have to use a double precision multiplication to produce the result required. Such an operation would be horribly slow on most
processors and drag this to a crawl. However, GCC is smart enough to realize that double wide output single precision multipliers can be used. For
example, the instruction ``MUL'' on the x86 processor can multiply two 32-bit values and produce a 64-bit result.
If we cannot use the Comba method we proceed to setup the baseline routine. We allocate the the destination mp\_int
$t$ (line @36,init@) to the exact size of the output to avoid further re--allocations. At this point we now
begin the $O(n^2)$ loop.
This implementation of multiplication has the caveat that it can be trimmed to only produce a variable number of
digits as output. In each iteration of the outer loop the $pb$ variable is set (line @48,MIN@) to the maximum
number of inner loop iterations.
Inside the inner loop we calculate $\hat r$ as the mp\_word product of the two mp\_digits and the addition of the
carry from the previous iteration. A particularly important observation is that most modern optimizing
C compilers (GCC for instance) can recognize that a $N \times N \rightarrow 2N$ multiplication is all that
is required for the product. In x86 terms for example, this means using the MUL instruction.
Each digit of the product is stored in turn (line @68,tmpt@) and the carry propagated (line @71,>>@) to the
next iteration.
\subsection{Faster Multiplication by the ``Comba'' Method}
MARK,COMBA
One of the huge drawbacks of the ``baseline'' algorithms is that at the $O(n^2)$ level the carry must be computed and propagated upwards. This
makes the nested loop very sequential and hard to unroll and implement in parallel. The ``Comba'' \cite{COMBA} method is named after little known
(\textit{in cryptographic venues}) Paul G. Comba who described a method of implementing fast multipliers that do not require nested
carry fixup operations. As an interesting aside it seems that Paul Barrett describes a similar technique in
his 1986 paper \cite{BARRETT} written five years before.
One of the huge drawbacks of the ``baseline'' algorithms is that at the $O(n^2)$ level the carry must be
computed and propagated upwards. This makes the nested loop very sequential and hard to unroll and implement
in parallel. The ``Comba'' \cite{COMBA} method is named after little known (\textit{in cryptographic venues}) Paul G.
Comba who described a method of implementing fast multipliers that do not require nested carry fixup operations. As an
interesting aside it seems that Paul Barrett describes a similar technique in his 1986 paper \cite{BARRETT} written
five years before.
At the heart of the Comba technique is once again the long-hand algorithm. Except in this case a slight twist is placed on how
the columns of the result are produced. In the standard long-hand algorithm rows of products are produced then added together to form the
final result. In the baseline algorithm the columns are added together after each iteration to get the result instantaneously.
At the heart of the Comba technique is once again the long-hand algorithm. Except in this case a slight
twist is placed on how the columns of the result are produced. In the standard long-hand algorithm rows of products
are produced then added together to form the final result. In the baseline algorithm the columns are added together
after each iteration to get the result instantaneously.
In the Comba algorithm the columns of the result are produced entirely independently of each other. That is at the $O(n^2)$ level a
simple multiplication and addition step is performed. The carries of the columns are propagated after the nested loop to reduce the amount
of work requiored. Succintly the first step of the algorithm is to compute the product vector $\vec x$ as follows.
In the Comba algorithm the columns of the result are produced entirely independently of each other. That is at
the $O(n^2)$ level a simple multiplication and addition step is performed. The carries of the columns are propagated
after the nested loop to reduce the amount of work requiored. Succintly the first step of the algorithm is to compute
the product vector $\vec x$ as follows.
\begin{equation}
\vec x_n = \sum_{i+j = n} a_ib_j, \forall n \in \lbrace 0, 1, 2, \ldots, i + j \rbrace
@@ -2584,38 +2596,32 @@ $256$ digits would allow for numbers in the range of $0 \le x < 2^{7168}$ which,
\textbf{Input}. mp\_int $a$, mp\_int $b$ and an integer $digs$ \\
\textbf{Output}. $c \leftarrow \vert a \vert \cdot \vert b \vert \mbox{ (mod }\beta^{digs}\mbox{)}$. \\
\hline \\
Place an array of \textbf{MP\_WARRAY} double precision digits named $\hat W$ on the stack. \\
Place an array of \textbf{MP\_WARRAY} single precision digits named $W$ on the stack. \\
1. If $c.alloc < digs$ then grow $c$ to $digs$ digits. (\textit{mp\_grow}) \\
2. If step 1 failed return(\textit{MP\_MEM}).\\
\\
Zero the temporary array $\hat W$. \\
3. for $n$ from $0$ to $digs - 1$ do \\
\hspace{3mm}3.1 $\hat W_n \leftarrow 0$ \\
3. $pa \leftarrow \mbox{MIN}(digs, a.used + b.used)$ \\
\\
Compute the columns. \\
4. for $ix$ from $0$ to $a.used - 1$ do \\
\hspace{3mm}4.1 $pb \leftarrow \mbox{min}(b.used, digs - ix)$ \\
\hspace{3mm}4.2 If $pb < 1$ then goto step 5. \\
\hspace{3mm}4.3 for $iy$ from $0$ to $pb - 1$ do \\
\hspace{6mm}4.3.1 $\hat W_{ix+iy} \leftarrow \hat W_{ix+iy} + a_{ix}b_{iy}$ \\
4. $\_ \hat W \leftarrow 0$ \\
5. for $ix$ from 0 to $pa - 1$ do \\
\hspace{3mm}5.1 $ty \leftarrow \mbox{MIN}(b.used - 1, ix)$ \\
\hspace{3mm}5.2 $tx \leftarrow ix - ty$ \\
\hspace{3mm}5.3 $iy \leftarrow \mbox{MIN}(a.used - tx, ty + 1)$ \\
\hspace{3mm}5.4 for $iz$ from 0 to $iy - 1$ do \\
\hspace{6mm}5.4.1 $\_ \hat W \leftarrow \_ \hat W + a_{tx+iy}b_{ty-iy}$ \\
\hspace{3mm}5.5 $W_{ix} \leftarrow \_ \hat W (\mbox{mod }\beta)$\\
\hspace{3mm}5.6 $\_ \hat W \leftarrow \lfloor \_ \hat W / \beta \rfloor$ \\
6. $W_{pa} \leftarrow \_ \hat W (\mbox{mod }\beta)$ \\
\\
Propagate the carries upwards. \\
5. $oldused \leftarrow c.used$ \\
6. $c.used \leftarrow digs$ \\
7. If $digs > 1$ then do \\
\hspace{3mm}7.1. for $ix$ from $1$ to $digs - 1$ do \\
\hspace{6mm}7.1.1 $\hat W_{ix} \leftarrow \hat W_{ix} + \lfloor \hat W_{ix-1} / \beta \rfloor$ \\
\hspace{6mm}7.1.2 $c_{ix - 1} \leftarrow \hat W_{ix - 1} \mbox{ (mod }\beta\mbox{)}$ \\
8. else do \\
\hspace{3mm}8.1 $ix \leftarrow 0$ \\
9. $c_{ix} \leftarrow \hat W_{ix} \mbox{ (mod }\beta\mbox{)}$ \\
7. $oldused \leftarrow c.used$ \\
8. $c.used \leftarrow digs$ \\
9. for $ix$ from $0$ to $pa$ do \\
\hspace{3mm}9.1 $c_{ix} \leftarrow W_{ix}$ \\
10. for $ix$ from $pa + 1$ to $oldused - 1$ do \\
\hspace{3mm}10.1 $c_{ix} \leftarrow 0$ \\
\\
Zero excess digits. \\
10. If $digs < oldused$ then do \\
\hspace{3mm}10.1 for $n$ from $digs$ to $oldused - 1$ do \\
\hspace{6mm}10.1.1 $c_n \leftarrow 0$ \\
11. Clamp excessive digits of $c$. (\textit{mp\_clamp}) \\
12. Return(\textit{MP\_OKAY}). \\
11. Clamp $c$. \\
12. Return MP\_OKAY. \\
\hline
\end{tabular}
\end{center}
@@ -2625,15 +2631,24 @@ Zero excess digits. \\
\end{figure}
\textbf{Algorithm fast\_s\_mp\_mul\_digs.}
This algorithm performs the unsigned multiplication of $a$ and $b$ using the Comba method limited to $digs$ digits of precision. The algorithm
essentially peforms the same calculation as algorithm s\_mp\_mul\_digs, just much faster.
This algorithm performs the unsigned multiplication of $a$ and $b$ using the Comba method limited to $digs$ digits of precision.
The array $\hat W$ is meant to be on the stack when the algorithm is used. The size of the array does not change which is ideal. Note also that
unlike algorithm s\_mp\_mul\_digs no temporary mp\_int is required since the result is calculated directly in $\hat W$.
The outer loop of this algorithm is more complicated than that of the baseline multiplier. This is because on the inside of the
loop we want to produce one column per pass. This allows the accumulator $\_ \hat W$ to be placed in CPU registers and
reduce the memory bandwidth to two \textbf{mp\_digit} reads per iteration.
The $O(n^2)$ loop on step four is where the Comba method's advantages begin to show through in comparison to the baseline algorithm. The lack of
a carry variable or propagation in this loop allows the loop to be performed with only single precision multiplication and additions. Now that each
iteration of the inner loop can be performed independent of the others the inner loop can be performed with a high level of parallelism.
The $ty$ variable is set to the minimum count of $ix$ or the number of digits in $b$. That way if $a$ has more digits than
$b$ this will be limited to $b.used - 1$. The $tx$ variable is set to the to the distance past $b.used$ the variable
$ix$ is. This is used for the immediately subsequent statement where we find $iy$.
The variable $iy$ is the minimum digits we can read from either $a$ or $b$ before running out. Computing one column at a time
means we have to scan one integer upwards and the other downwards. $a$ starts at $tx$ and $b$ starts at $ty$. In each
pass we are producing the $ix$'th output column and we note that $tx + ty = ix$. As we move $tx$ upwards we have to
move $ty$ downards so the equality remains valid. The $iy$ variable is the number of iterations until
$tx \ge a.used$ or $ty < 0$ occurs.
After every inner pass we store the lower half of the accumulator into $W_{ix}$ and then propagate the carry of the accumulator
into the next round by dividing $\_ \hat W$ by $\beta$.
To measure the benefits of the Comba method over the baseline method consider the number of operations that are required. If the
cost in terms of time of a multiply and addition is $p$ and the cost of a carry propagation is $q$ then a baseline multiplication would require
@@ -2643,20 +2658,20 @@ and addition operations in the nested loop in parallel.
EXAM,bn_fast_s_mp_mul_digs.c
The memset on line @47,memset@ clears the initial $\hat W$ array to zero in a single step. Like the slower baseline multiplication
implementation a series of aliases (\textit{lines @67, tmpx@, @70, tmpy@ and @75,_W@}) are used to simplify the inner $O(n^2)$ loop.
In this case a new alias $\_\hat W$ has been added which refers to the double precision columns offset by $ix$ in each pass.
As per the pseudo--code we first calculate $pa$ (line @47,MIN@) as the number of digits to output. Next we begin the outer loop
to produce the individual columns of the product. We use the two aliases $tmpx$ and $tmpy$ (lines @61,tmpx@, @62,tmpy@) to point
inside the two multiplicands quickly.
The inner loop on lines @83,for@, @84,mp_word@ and @85,}@ is where the algorithm will spend the majority of the time, which is why it has been
stripped to the bones of any extra baggage\footnote{Hence the pointer aliases.}. On x86 processors the multiplication and additions amount to at the
very least five instructions (\textit{two loads, two additions, one multiply}) while on the ARMv4 processors they amount to only three
(\textit{one load, one store, one multiply-add}). For both of the x86 and ARMv4 processors the GCC compiler performs a good job at unrolling the loop
and scheduling the instructions so there are very few dependency stalls.
The inner loop (lines @70,for@ to @72,}@) of this implementation is where the tradeoff come into play. Originally this comba
implementation was ``row--major'' which means it adds to each of the columns in each pass. After the outer loop it would then fix
the carries. This was very fast except it had an annoying drawback. You had to read a mp\_word and two mp\_digits and write
one mp\_word per iteration. On processors such as the Athlon XP and P4 this did not matter much since the cache bandwidth
is very high and it can keep the ALU fed with data. It did, however, matter on older and embedded cpus where cache is often
slower and also often doesn't exist. This new algorithm only performs two reads per iteration under the assumption that the
compiler has aliased $\_ \hat W$ to a CPU register.
In theory the difference between the baseline and comba algorithms is a mere $O(qn)$ time difference. However, in the $O(n^2)$ nested loop of the
baseline method there are dependency stalls as the algorithm must wait for the multiplier to finish before propagating the carry to the next
digit. As a result fewer of the often multiple execution units\footnote{The AMD Athlon has three execution units and the Intel P4 has four.} can
be simultaneously used.
After the inner loop we store the current accumulator in $W$ and shift $\_ \hat W$ (lines @75,W[ix]@, @78,>>@) to forward it as
a carry for the next pass. After the outer loop we use the final carry (line @82,W[ix]@) as the last digit of the product.
\subsection{Polynomial Basis Multiplication}
To break the $O(n^2)$ barrier in multiplication requires a completely different look at integer multiplication. In the following algorithms
@@ -2976,13 +2991,26 @@ result $a \cdot b$ is produced.
EXAM,bn_mp_toom_mul.c
-- Comments to be added during editing phase.
The first obvious thing to note is that this algorithm is complicated. The complexity is worth it if you are multiplying very
large numbers. For example, a 10,000 digit multiplication takes approximaly 99,282,205 fewer single precision multiplications with
Toom--Cook than a Comba or baseline approach (this is a savings of more than 99$\%$). For most ``crypto'' sized numbers this
algorithm is not practical as Karatsuba has a much lower cutoff point.
First we split $a$ and $b$ into three roughly equal portions. This has been accomplished (lines @40,mod@ to @69,rshd@) with
combinations of mp\_rshd() and mp\_mod\_2d() function calls. At this point $a = a2 \cdot \beta^2 + a1 \cdot \beta + a0$ and similiarly
for $b$.
Next we compute the five points $w0, w1, w2, w3$ and $w4$. Recall that $w0$ and $w4$ can be computed directly from the portions so
we get those out of the way first (lines @72,mul@ and @77,mul@). Next we compute $w1, w2$ and $w3$ using Horners method.
After this point we solve for the actual values of $w1, w2$ and $w3$ by reducing the $5 \times 5$ system which is relatively
straight forward.
\subsection{Signed Multiplication}
Now that algorithms to handle multiplications of every useful dimensions have been developed, a rather simple finishing touch is required. So far all
of the multiplication algorithms have been unsigned multiplications which leaves only a signed multiplication algorithm to be established.
\newpage\begin{figure}[!here]
\begin{figure}[!here]
\begin{small}
\begin{center}
\begin{tabular}{l}
@@ -3065,7 +3093,7 @@ Column two of row one is a square and column three is the first unique column.
The baseline squaring algorithm is meant to be a catch-all squaring algorithm. It will handle any of the input sizes that the faster routines
will not handle.
\newpage\begin{figure}[!here]
\begin{figure}[!here]
\begin{small}
\begin{center}
\begin{tabular}{l}
@@ -3121,9 +3149,14 @@ results calculated so far. This involves expensive carry propagation which will
EXAM,bn_s_mp_sqr.c
Inside the outer loop (\textit{see line @32,for@}) the square term is calculated on line @35,r =@. Line @42,>>@ extracts the carry from the square
term. Aliases for $a_{ix}$ and $t_{ix+iy}$ are initialized on lines @45,tmpx@ and @48,tmpt@ respectively. The doubling is performed using two
additions (\textit{see line @57,r + r@}) since it is usually faster than shifting,if not at least as fast.
Inside the outer loop (line @32,for@) the square term is calculated on line @35,r =@. The carry (line @42,>>@) has been
extracted from the mp\_word accumulator using a right shift. Aliases for $a_{ix}$ and $t_{ix+iy}$ are initialized
(lines @45,tmpx@ and @48,tmpt@) to simplify the inner loop. The doubling is performed using two
additions (line @57,r + r@) since it is usually faster than shifting, if not at least as fast.
The important observation is that the inner loop does not begin at $iy = 0$ like for multiplication. As such the inner loops
get progressively shorter as the algorithm proceeds. This is what leads to the savings compared to using a multiplication to
square a number.
\subsection{Faster Squaring by the ``Comba'' Method}
A major drawback to the baseline method is the requirement for single precision shifting inside the $O(n^2)$ nested loop. Squaring has an additional
@@ -3135,9 +3168,9 @@ propagation operations from the inner loop. However, the inner product must sti
that $2a + 2b + 2c = 2(a + b + c)$. That is the sum of all of the double products is equal to double the sum of all the products. For example,
$ab + ba + ac + ca = 2ab + 2ac = 2(ab + ac)$.
However, we cannot simply double all of the columns, since the squares appear only once per row. The most practical solution is to have two mp\_word
arrays. One array will hold the squares and the other array will hold the double products. With both arrays the doubling and carry propagation can be
moved to a $O(n)$ work level outside the $O(n^2)$ level.
However, we cannot simply double all of the columns, since the squares appear only once per row. The most practical solution is to have two
mp\_word arrays. One array will hold the squares and the other array will hold the double products. With both arrays the doubling and
carry propagation can be moved to a $O(n)$ work level outside the $O(n^2)$ level. In this case, we have an even simpler solution in mind.
\newpage\begin{figure}[!here]
\begin{small}
@@ -3147,34 +3180,34 @@ moved to a $O(n)$ work level outside the $O(n^2)$ level.
\textbf{Input}. mp\_int $a$ \\
\textbf{Output}. $b \leftarrow a^2$ \\
\hline \\
Place two arrays of \textbf{MP\_WARRAY} mp\_words named $\hat W$ and $\hat {X}$ on the stack. \\
Place an array of \textbf{MP\_WARRAY} mp\_digits named $W$ on the stack. \\
1. If $b.alloc < 2a.used + 1$ then grow $b$ to $2a.used + 1$ digits. (\textit{mp\_grow}). \\
2. If step 1 failed return(\textit{MP\_MEM}). \\
3. for $ix$ from $0$ to $2a.used + 1$ do \\
\hspace{3mm}3.1 $\hat W_{ix} \leftarrow 0$ \\
\hspace{3mm}3.2 $\hat {X}_{ix} \leftarrow 0$ \\
4. for $ix$ from $0$ to $a.used - 1$ do \\
\hspace{3mm}Compute the square.\\
\hspace{3mm}4.1 $\hat {X}_{ix+ix} \leftarrow \left ( a_{ix} \right )^2$ \\
\\
\hspace{3mm}Compute the double products.\\
\hspace{3mm}4.2 for $iy$ from $ix + 1$ to $a.used - 1$ do \\
\hspace{6mm}4.2.1 $\hat W_{ix+iy} \leftarrow \hat W_{ix+iy} + a_{ix}a_{iy}$ \\
5. $oldused \leftarrow b.used$ \\
6. $b.used \leftarrow 2a.used + 1$ \\
3. $pa \leftarrow 2 \cdot a.used$ \\
4. $\hat W1 \leftarrow 0$ \\
5. for $ix$ from $0$ to $pa - 1$ do \\
\hspace{3mm}5.1 $\_ \hat W \leftarrow 0$ \\
\hspace{3mm}5.2 $ty \leftarrow \mbox{MIN}(a.used - 1, ix)$ \\
\hspace{3mm}5.3 $tx \leftarrow ix - ty$ \\
\hspace{3mm}5.4 $iy \leftarrow \mbox{MIN}(a.used - tx, ty + 1)$ \\
\hspace{3mm}5.5 $iy \leftarrow \mbox{MIN}(iy, \lfloor \left (ty - tx + 1 \right )/2 \rfloor)$ \\
\hspace{3mm}5.6 for $iz$ from $0$ to $iz - 1$ do \\
\hspace{6mm}5.6.1 $\_ \hat W \leftarrow \_ \hat W + a_{tx + iz}a_{ty - iz}$ \\
\hspace{3mm}5.7 $\_ \hat W \leftarrow 2 \cdot \_ \hat W + \hat W1$ \\
\hspace{3mm}5.8 if $ix$ is even then \\
\hspace{6mm}5.8.1 $\_ \hat W \leftarrow \_ \hat W + \left ( a_{\lfloor ix/2 \rfloor}\right )^2$ \\
\hspace{3mm}5.9 $W_{ix} \leftarrow \_ \hat W (\mbox{mod }\beta)$ \\
\hspace{3mm}5.10 $\hat W1 \leftarrow \lfloor \_ \hat W / \beta \rfloor$ \\
\\
Double the products and propagate the carries simultaneously. \\
7. $\hat W_0 \leftarrow 2 \hat W_0 + \hat {X}_0$ \\
8. for $ix$ from $1$ to $2a.used$ do \\
\hspace{3mm}8.1 $\hat W_{ix} \leftarrow 2 \hat W_{ix} + \hat {X}_{ix}$ \\
\hspace{3mm}8.2 $\hat W_{ix} \leftarrow \hat W_{ix} + \lfloor \hat W_{ix - 1} / \beta \rfloor$ \\
\hspace{3mm}8.3 $b_{ix-1} \leftarrow W_{ix-1} \mbox{ (mod }\beta\mbox{)}$ \\
9. $b_{2a.used} \leftarrow \hat W_{2a.used} \mbox{ (mod }\beta\mbox{)}$ \\
10. if $2a.used + 1 < oldused$ then do \\
\hspace{3mm}10.1 for $ix$ from $2a.used + 1$ to $oldused$ do \\
\hspace{6mm}10.1.1 $b_{ix} \leftarrow 0$ \\
11. Clamp excess digits from $b$. (\textit{mp\_clamp}) \\
12. Return(\textit{MP\_OKAY}). \\
6. $oldused \leftarrow b.used$ \\
7. $b.used \leftarrow 2 \cdot a.used$ \\
8. for $ix$ from $0$ to $pa - 1$ do \\
\hspace{3mm}8.1 $b_{ix} \leftarrow W_{ix}$ \\
9. for $ix$ from $pa$ to $oldused - 1$ do \\
\hspace{3mm}9.1 $b_{ix} \leftarrow 0$ \\
10. Clamp excess digits from $b$. (\textit{mp\_clamp}) \\
11. Return(\textit{MP\_OKAY}). \\
\hline
\end{tabular}
\end{center}
@@ -3183,24 +3216,24 @@ Double the products and propagate the carries simultaneously. \\
\end{figure}
\textbf{Algorithm fast\_s\_mp\_sqr.}
This algorithm computes the square of an input using the Comba technique. It is designed to be a replacement for algorithm s\_mp\_sqr when
the number of input digits is less than \textbf{MP\_WARRAY} and less than $\delta \over 2$.
This algorithm computes the square of an input using the Comba technique. It is designed to be a replacement for algorithm
s\_mp\_sqr when the number of input digits is less than \textbf{MP\_WARRAY} and less than $\delta \over 2$.
This algorithm is very similar to the Comba multiplier except with a few key differences we shall make note of.
This routine requires two arrays of mp\_words to be placed on the stack. The first array $\hat W$ will hold the double products and the second
array $\hat X$ will hold the squares. Though only at most $MP\_WARRAY \over 2$ words of $\hat X$ are used, it has proven faster on most
processors to simply make it a full size array.
First, we have an accumulator and carry variables $\_ \hat W$ and $\hat W1$ respectively. This is because the inner loop
products are to be doubled. If we had added the previous carry in we would be doubling too much. Next we perform an
addition MIN condition on $iy$ (step 5.5) to prevent overlapping digits. For example, $a_3 \cdot a_5$ is equal
$a_5 \cdot a_3$. Whereas in the multiplication case we would have $5 < a.used$ and $3 \ge 0$ is maintained since we double the sum
of the products just outside the inner loop we have to avoid doing this. This is also a good thing since we perform
fewer multiplications and the routine ends up being faster.
The loop on step 3 will zero the two arrays to prepare them for the squaring step. Step 4.1 computes the squares of the product. Note how
it simply assigns the value into the $\hat X$ array. The nested loop on step 4.2 computes the doubles of the products. This loop
computes the sum of the products for each column. They are not doubled until later.
After the squaring loop, the products stored in $\hat W$ musted be doubled and the carries propagated forwards. It makes sense to do both
operations at the same time. The expression $\hat W_{ix} \leftarrow 2 \hat W_{ix} + \hat {X}_{ix}$ computes the sum of the double product and the
squares in place.
Finally the last difference is the addition of the ``square'' term outside the inner loop (step 5.8). We add in the square
only to even outputs and it is the square of the term at the $\lfloor ix / 2 \rfloor$ position.
EXAM,bn_fast_s_mp_sqr.c
-- Write something deep and insightful later, Tom.
This implementation is essentially a copy of Comba multiplication with the appropriate changes added to make it faster for
the special case of squaring.
\subsection{Polynomial Basis Squaring}
The same algorithm that performs optimal polynomial basis multiplication can be used to perform polynomial basis squaring. The minor exception
@@ -3312,14 +3345,13 @@ By inlining the copy and shift operations the cutoff point for Karatsuba multipl
is exactly at the point where Comba squaring can no longer be used (\textit{128 digits}). On slower processors such as the Intel P4
it is actually below the Comba limit (\textit{at 110 digits}).
This routine uses the same error trap coding style as mp\_karatsuba\_sqr. As the temporary variables are initialized errors are redirected to
the error trap higher up. If the algorithm completes without error the error code is set to \textbf{MP\_OKAY} and mp\_clears are executed normally.
\textit{Last paragraph sucks. re-write! -- Tom}
This routine uses the same error trap coding style as mp\_karatsuba\_sqr. As the temporary variables are initialized errors are
redirected to the error trap higher up. If the algorithm completes without error the error code is set to \textbf{MP\_OKAY} and
mp\_clears are executed normally.
\subsection{Toom-Cook Squaring}
The Toom-Cook squaring algorithm mp\_toom\_sqr is heavily based on the algorithm mp\_toom\_mul with the exception that squarings are used
instead of multiplication to find the five relations.. The reader is encouraged to read the description of the latter algorithm and try to
instead of multiplication to find the five relations. The reader is encouraged to read the description of the latter algorithm and try to
derive their own Toom-Cook squaring algorithm.
\subsection{High Level Squaring}
@@ -3362,12 +3394,9 @@ EXAM,bn_mp_sqr.c
$\left [ 3 \right ] $ & Devise an efficient algorithm for selection of the radix point to handle inputs \\
& that have different number of digits in Karatsuba multiplication. \\
& \\
$\left [ 3 \right ] $ & In ~SQUARE~ the fact that every column of a squaring is made up \\
$\left [ 2 \right ] $ & In ~SQUARE~ the fact that every column of a squaring is made up \\
& of double products and at most one square is stated. Prove this statement. \\
& \\
$\left [ 2 \right ] $ & In the Comba squaring algorithm half of the $\hat X$ variables are not used. \\
& Revise algorithm fast\_s\_mp\_sqr to shrink the $\hat X$ array. \\
& \\
$\left [ 3 \right ] $ & Prove the equation for Karatsuba squaring. \\
& \\
$\left [ 1 \right ] $ & Prove that Karatsuba squaring requires $O \left (n^{lg(3)} \right )$ time. \\
@@ -3375,6 +3404,14 @@ $\left [ 1 \right ] $ & Prove that Karatsuba squaring requires $O \left (n^{lg(3
$\left [ 2 \right ] $ & Determine the minimal ratio between addition and multiplication clock cycles \\
& required for equation $6.7$ to be true. \\
& \\
$\left [ 3 \right ] $ & Implement a threaded version of Comba multiplication (and squaring) where you \\
& compute subsets of the columns in each thread. Determine a cutoff point where \\
& it is effective and add the logic to mp\_mul() and mp\_sqr(). \\
&\\
$\left [ 4 \right ] $ & Same as the previous but also modify the Karatsuba and Toom-Cook. You must \\
& increase the throughput of mp\_exptmod() for random odd moduli in the range \\
& $512 \ldots 4096$ bits significantly ($> 2x$) to complete this challenge. \\
& \\
\end{tabular}
\chapter{Modular Reduction}
@@ -3394,7 +3431,7 @@ other forms of residues.
Modular reductions are normally used to create either finite groups, rings or fields. The most common usage for performance driven modular reductions
is in modular exponentiation algorithms. That is to compute $d = a^b \mbox{ (mod }c\mbox{)}$ as fast as possible. This operation is used in the
RSA and Diffie-Hellman public key algorithms, for example. Modular multiplication and squaring also appears as a fundamental operation in
Elliptic Curve cryptographic algorithms. As will be discussed in the subsequent chapter there exist fast algorithms for computing modular
elliptic curve cryptographic algorithms. As will be discussed in the subsequent chapter there exist fast algorithms for computing modular
exponentiations without having to perform (\textit{in this example}) $b - 1$ multiplications. These algorithms will produce partial results in the
range $0 \le x < c^2$ which can be taken advantage of to create several efficient algorithms. They have also been used to create redundancy check
algorithms known as CRCs, error correction codes such as Reed-Solomon and solve a variety of number theoeretic problems.
@@ -3610,7 +3647,7 @@ safe to do so.
In order to use algorithm mp\_reduce the value of $\mu$ must be calculated in advance. Ideally this value should be computed once and stored for
future use so that the Barrett algorithm can be used without delay.
\begin{figure}[!here]
\newpage\begin{figure}[!here]
\begin{small}
\begin{center}
\begin{tabular}{l}
@@ -5818,6 +5855,8 @@ To explain the Jacobi Symbol we shall first discuss the Legendre function\footno
defined. The Legendre function computes whether or not an integer $a$ is a quadratic residue modulo an odd prime $p$. Numerically it is
equivalent to equation \ref{eqn:legendre}.
\textit{-- Tom, don't be an ass, cite your source here...!}
\begin{equation}
a^{(p-1)/2} \equiv \begin{array}{rl}
-1 & \mbox{if }a\mbox{ is a quadratic non-residue.} \\

View File

@@ -90,8 +90,11 @@
#define BN_MP_READ_UNSIGNED_BIN_C
#define BN_MP_REDUCE_C
#define BN_MP_REDUCE_2K_C
#define BN_MP_REDUCE_2K_L_C
#define BN_MP_REDUCE_2K_SETUP_C
#define BN_MP_REDUCE_2K_SETUP_L_C
#define BN_MP_REDUCE_IS_2K_C
#define BN_MP_REDUCE_IS_2K_L_C
#define BN_MP_REDUCE_SETUP_C
#define BN_MP_RSHD_C
#define BN_MP_SET_C
@@ -105,7 +108,9 @@
#define BN_MP_SUB_D_C
#define BN_MP_SUBMOD_C
#define BN_MP_TO_SIGNED_BIN_C
#define BN_MP_TO_SIGNED_BIN_N_C
#define BN_MP_TO_UNSIGNED_BIN_C
#define BN_MP_TO_UNSIGNED_BIN_N_C
#define BN_MP_TOOM_MUL_C
#define BN_MP_TOOM_SQR_C
#define BN_MP_TORADIX_C
@@ -132,7 +137,7 @@
#define BN_MP_ISEVEN_C
#define BN_MP_INIT_MULTI_C
#define BN_MP_COPY_C
#define BN_MP_ABS_C
#define BN_MP_MOD_C
#define BN_MP_SET_C
#define BN_MP_DIV_2_C
#define BN_MP_ISODD_C
@@ -242,6 +247,7 @@
#define BN_MP_INIT_MULTI_C
#define BN_MP_SET_C
#define BN_MP_COUNT_BITS_C
#define BN_MP_ABS_C
#define BN_MP_MUL_2D_C
#define BN_MP_CMP_C
#define BN_MP_SUB_C
@@ -323,11 +329,12 @@
#define BN_MP_CLEAR_C
#define BN_MP_ABS_C
#define BN_MP_CLEAR_MULTI_C
#define BN_MP_REDUCE_IS_2K_L_C
#define BN_S_MP_EXPTMOD_C
#define BN_MP_DR_IS_MODULUS_C
#define BN_MP_REDUCE_IS_2K_C
#define BN_MP_ISODD_C
#define BN_MP_EXPTMOD_FAST_C
#define BN_S_MP_EXPTMOD_C
#endif
#if defined(BN_MP_EXPTMOD_FAST_C)
@@ -359,6 +366,7 @@
#define BN_MP_DIV_C
#define BN_MP_MUL_C
#define BN_MP_SUB_C
#define BN_MP_NEG_C
#define BN_MP_EXCH_C
#define BN_MP_CLEAR_MULTI_C
#endif
@@ -433,6 +441,7 @@
#if defined(BN_MP_INVMOD_SLOW_C)
#define BN_MP_ISZERO_C
#define BN_MP_INIT_MULTI_C
#define BN_MP_MOD_C
#define BN_MP_COPY_C
#define BN_MP_ISEVEN_C
#define BN_MP_SET_C
@@ -724,6 +733,17 @@
#define BN_MP_CLEAR_C
#endif
#if defined(BN_MP_REDUCE_2K_L_C)
#define BN_MP_INIT_C
#define BN_MP_COUNT_BITS_C
#define BN_MP_DIV_2D_C
#define BN_MP_MUL_C
#define BN_S_MP_ADD_C
#define BN_MP_CMP_MAG_C
#define BN_S_MP_SUB_C
#define BN_MP_CLEAR_C
#endif
#if defined(BN_MP_REDUCE_2K_SETUP_C)
#define BN_MP_INIT_C
#define BN_MP_COUNT_BITS_C
@@ -732,11 +752,22 @@
#define BN_S_MP_SUB_C
#endif
#if defined(BN_MP_REDUCE_2K_SETUP_L_C)
#define BN_MP_INIT_C
#define BN_MP_2EXPT_C
#define BN_MP_COUNT_BITS_C
#define BN_S_MP_SUB_C
#define BN_MP_CLEAR_C
#endif
#if defined(BN_MP_REDUCE_IS_2K_C)
#define BN_MP_REDUCE_2K_C
#define BN_MP_COUNT_BITS_C
#endif
#if defined(BN_MP_REDUCE_IS_2K_L_C)
#endif
#if defined(BN_MP_REDUCE_SETUP_C)
#define BN_MP_2EXPT_C
#define BN_MP_DIV_C
@@ -814,6 +845,11 @@
#define BN_MP_TO_UNSIGNED_BIN_C
#endif
#if defined(BN_MP_TO_SIGNED_BIN_N_C)
#define BN_MP_SIGNED_BIN_SIZE_C
#define BN_MP_TO_SIGNED_BIN_C
#endif
#if defined(BN_MP_TO_UNSIGNED_BIN_C)
#define BN_MP_INIT_COPY_C
#define BN_MP_ISZERO_C
@@ -821,6 +857,11 @@
#define BN_MP_CLEAR_C
#endif
#if defined(BN_MP_TO_UNSIGNED_BIN_N_C)
#define BN_MP_UNSIGNED_BIN_SIZE_C
#define BN_MP_TO_UNSIGNED_BIN_C
#endif
#if defined(BN_MP_TOOM_MUL_C)
#define BN_MP_INIT_MULTI_C
#define BN_MP_MOD_2D_C
@@ -901,10 +942,12 @@
#define BN_MP_INIT_C
#define BN_MP_CLEAR_C
#define BN_MP_REDUCE_SETUP_C
#define BN_MP_REDUCE_C
#define BN_MP_REDUCE_2K_SETUP_L_C
#define BN_MP_REDUCE_2K_L_C
#define BN_MP_MOD_C
#define BN_MP_COPY_C
#define BN_MP_SQR_C
#define BN_MP_REDUCE_C
#define BN_MP_MUL_C
#define BN_MP_SET_C
#define BN_MP_EXCH_C