; bignuma.asm ; based on: ; bbignuma.asm - asm routines for bignumbers ; Wesley Loewer's Big Numbers. (C) 1994-95, Wesley B. Loewer ; based pointer version ; See BIGLIB.TXT for further documentation. ; general programming notes ; single arg procedures, p(r), r = ebx (or esi when required) ; two arg procedures, p(r,n), r=edi, n=ebx(or esi when required) ; two arg procedures, p(n1,n2), n1=ebx(or esi when required), n2=edi ; three arg proc, p(r,n1,n2), r=edi, n1=esi, n2=ebx ; unless otherwise noted, such as full_mult, mult, full_square, square ; This code started from bignuma.asm as a base. ; This code assumes that bnlength is a multiple of 8. %include "xfract_a.inc" ; external functions CEXTERN neg_a_bf ;external variables CEXTERN bnlength ;:dword 4 - int CEXTERN rlength ;:dword 4 - int CEXTERN bflength ;:dword 4 - int CEXTERN intlength ;:dword 4 - int section .text ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r = 0 ; clear_bn PROC USES di, r:bn_t ; r is passed in ebp + 8 CGLOBAL clear_bn clear_bn: %define r ebp + 8 push ebp ; if FRAME not used, do this mov ebp, esp mov ecx, [bnlength] mov edi, dword [r] ; load pointer in edi sub eax, eax ; clear eax shr ecx, 2 ; 1 byte = 1/4 word rep stosd ; clear r, dword at a time mov eax, dword [r] ; return r in eax mov esp, ebp ; if UNFRAME not used, do this pop ebp ret ;; clear_bn ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r = max positive value ; max_bn PROC USES di, r:bn_t ; r is passed in ebp + 8 CGLOBAL max_bn max_bn: %define r ebp + 8 push ebp ; if FRAME not used, do this mov ebp, esp mov ecx, [bnlength] mov edi, dword [r] mov eax, 0FFFFFFFFh ; set eax to max value shr ecx, 2 ; 1 byte = 1/4 dword rep stosd ; max out r, dword at a time ; when the above stos is finished, edi points to the byte past the end mov byte [edi-1], 7Fh ; turn off the sign bit mov eax, dword [r] ; return r in eax mov esp, ebp ; if UNFRAME not used, do this pop ebp ret ;; max_bn ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r = n ; copy_bn PROC USES di si, r:bn_t, n:bn_t ; r & n passed in ebp + 8 ebp + 12 CGLOBAL copy_bn copy_bn: %define r ebp + 8 %define n ebp + 12 push ebp ; if FRAME not used, do this mov ebp, esp mov ecx, [bnlength] mov edi, dword [r] mov esi, dword [n] shr ecx, 2 ; 1 byte = 1/4 dword rep movsd ; copy dword at a time mov eax, dword [r] ; return r in eax mov esp, ebp ; if UNFRAME not used, do this pop ebp ret ;; copy_bn ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; n1 != n2 ? ; RETURNS: if n1 == n2 returns 0 ; if n1 > n2 returns a positive (steps left to go when mismatch occured) ; if n1 < n2 returns a negative (steps left to go when mismatch occured) ; cmp_bn PROC USES di, n1:bn_t, n2:bn_t ; n1 & n2 passed in ebp + 8 ebp + 12 CGLOBAL cmp_bn cmp_bn: %define n1 ebp + 8 %define n2 ebp + 12 FRAME ebx mov ecx, [bnlength] mov edx, ecx ; save bnlength for later comparison mov edi, dword [n2] ; load n2 pointer in edi mov ebx, dword [n1] ; load n1 pointer in ebx add ebx, ecx ; point to end of bignumbers add edi, ecx ; where the msb is shr ecx, 2 ; byte = 1/4 dword .top_loop_32: sub ebx, 4 ; decrement to previous dword sub edi, 4 mov eax, dword [ebx] ; load n1 cmp eax, dword [edi] ; compare to n2 jne .not_match_32 ; don't match loop .top_loop_32 jmp .match ; ecx is zero .not_match_32: ; now determine which byte of the four did not match shl ecx, 2 ; convert back to bytes mov ebx, eax shr ebx, 16 ; shift ebx_high to bx cmp bh, [edi+3] ; compare to n2 jne .bottom ; jump if bh doesn't match dec ecx ; decrement ecx by 1 to show match cmp bl, [edi+2] ; compare to n2 jne .bottom ; jump if bl doesn't match dec ecx ; decrement ecx by 1 to show match cmp ah, [edi+1] ; compare to n2 jne .bottom ; jump if ah doesn't match ; if bh,bl,ah do match, then mismatch was in al dec ecx ; decrement ecx by 1 to show match cmp al, [edi] ; reset the flags for below ; jmp .bottom .bottom: ; flags are still set from last cmp ; if ecx == edx, then most significant part didn't match, use signed comparison ; else the decimals didn't match, use unsigned comparison lahf ; load results of last cmp cmp ecx, edx ; did they differ on very first cmp jne .not_first_step ; no sahf ; yes jg .n1_bigger ; signed comparison jmp .n2_bigger .not_first_step: sahf ja .n1_bigger ; unsigned comparison .n2_bigger: neg ecx ; make it negative .n1_bigger: ; leave it positive .match: ; leave it zero mov eax, ecx UNFRAME ebx ret ;; cmp_bn ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r < 0 ? ; returns 1 if negative, 0 if positive or zero ; is_bn_neg PROC n:bn_t ; n is passed in ebp + 8 CGLOBAL is_bn_neg is_bn_neg: %define n ebp + 8 FRAME ebx sub eax, eax ; clear upper eax mov ebx, dword [n] ; load n pointer in ebx add ebx, [bnlength] ; find sign bit mov al, byte [ebx-1] ; got it and al, 80h ; check the sign bit rol al, 1 ; rotate sign big to bit 0 sub ah, ah ; clear upper ax UNFRAME ebx ret ;; is_bn_neg ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; n != 0 ? ; RETURNS: if n != 0 returns 1 ; else returns 0 ; is_bn_not_zero PROC n:bn_t ; n is passed in ebp + 8 CGLOBAL is_bn_not_zero is_bn_not_zero: %define n ebp + 8 FRAME ebx mov ecx, dword [bnlength] mov ebx, dword [n] shr ecx, 2 ; byte = 1/4 dword .top_loop_32: cmp dword [ebx], 0 ; compare to n to 0 jnz .bottom ; not zero add ebx, 4 ; increment to next dword ; loop .top_loop_32 sub ecx, 1 cmp ecx, 0 jg .top_loop_32 ; jmp .bottom .bottom: ; if ecx is zero, then n was zero mov eax, ecx UNFRAME ebx ret ;; is_bn_not_zero ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r = n1 + n2 ; add_bn PROC USES di si, r:bn_t, n1:bn_t, n2:bn_t ; r, n1, & n2 passed in ebp + 8 ebp + 12 ebp + 16 CGLOBAL add_bn add_bn: %define r ebp + 8 %define n1 ebp + 12 %define n2 ebp + 16 FRAME ebx mov ecx, dword [bnlength] mov edi, dword [r] mov esi, dword [n1] mov ebx, dword [n2] shr ecx, 2 ; byte = 1/4 dword clc ; clear carry flag lahf ; save carry flag - uses ah .top_loop_32: sahf ; restore carry flag - so we can use cmp below mov eax, dword [esi] ; n1 adc eax, dword [ebx] ; n1+n2 mov dword [edi], eax ; r = n1+n2 lahf ; save carry flag add edi, 4 ; increment by double word size add esi, 4 add ebx, 4 ; loop .top_loop_32 sub ecx, 1 cmp ecx, 0 jg .top_loop_32 mov eax, dword [r] ; return r in eax UNFRAME ebx ret ;; add_bn ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r += n ; add_a_bn PROC USES di, r:bn_t, n:bn_t ; r & n passed in ebp + 8 ebp + 12 CGLOBAL add_a_bn add_a_bn: %define r ebp + 8 %define n ebp + 12 FRAME ebx mov ecx, [bnlength] mov edi, dword [r] mov ebx, dword [n] shr ecx, 2 ; byte = 1/4 dword clc ; clear carry flag lahf ; save carry flag - uses ah .top_loop_32: sahf ; restore carry flag - so we can use cmp below mov eax, dword [ebx] ; n adc dword [edi], eax ; r += n lahf ; save carry flag add edi, 4 ; increment by dword size add ebx, 4 ; loop .top_loop_32 sub ecx, 1 cmp ecx, 0 jg .top_loop_32 mov eax, dword [r] ; return r in eax UNFRAME ebx ret ;; add_a_bn ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r = n1 - n2 ; sub_bn PROC USES di si, r:bn_t, n1:bn_t, n2:bn_t ; r, n1, & n2 passed in ebp + 8 ebp + 12 ebp + 16 CGLOBAL sub_bn sub_bn: %define r ebp + 8 %define n1 ebp + 12 %define n2 ebp + 16 FRAME ebx mov ecx, [bnlength] mov edi, dword [r] mov esi, dword [n1] mov ebx, dword [n2] shr ecx, 2 ; byte = 1/4 dword clc ; clear carry flag lahf ; save carry flag - uses ah .top_loop_32: sahf ; restore carry flag - so we can use cmp below mov eax, dword [esi] ; n1 sbb eax, dword [ebx] ; n1-n2 mov dword [edi], eax ; r = n1-n2 lahf ; save carry flag add edi, 4 ; increment by dword size add esi, 4 add ebx, 4 ; loop .top_loop_32 sub ecx, 1 cmp ecx, 0 jg .top_loop_32 mov eax, dword [r] ; return r in eax UNFRAME ebx ret ;; sub_bn ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r -= n ; sub_a_bn PROC USES di, r:bn_t, n:bn_t ; r & n passed in ebp + 8 ebp + 12 CGLOBAL sub_a_bn sub_a_bn: %define r ebp + 8 %define n ebp + 12 FRAME ebx mov ecx, [bnlength] mov edi, dword [r] mov ebx, dword [n] shr ecx, 2 ; byte = 1/4 dword clc ; clear carry flag lahf ; save carry flag - uses ah .top_loop_32: sahf ; restore carry flag - so we can use cmp below mov eax, dword [ebx] ; n sbb dword [edi], eax ; r -= n - done with eax, so next okay lahf ; save carry flag add edi, 4 ; increment by dword size add ebx, 4 ; loop .top_loop_32 sub ecx, 1 cmp ecx, 0 jg .top_loop_32 mov eax, dword [r] ; return r in eax UNFRAME ebx ret ;; sub_a_bn ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r = -n ; neg_bn PROC USES di, r:bn_t, n:bn_t ; r & n passed in ebp + 8 ebp + 12 CGLOBAL neg_bn neg_bn: %define r ebp + 8 %define n ebp + 12 FRAME ebx mov ecx, [bnlength] mov edi, dword [r] mov ebx, dword [n] shr ecx, 2 ; byte = 1/4 dword .top_loop_32: mov eax, dword [ebx] neg eax mov dword [edi], eax jc short .no_more_carry_32 ; notice the "reverse" logic here add edi, 4 ; increment by dword size add ebx, 4 ; loop .top_loop_32 sub ecx, 1 cmp ecx, 0 jg .top_loop_32 jmp short .bottom .no_more_carry_32: add edi, 4 ; increment by dword size add ebx, 4 ; loop .top_loop_no_more_carry_32 ; jump down sub ecx, 1 cmp ecx, 0 jg .top_loop_no_more_carry_32 ; jump down jmp short .bottom .top_loop_no_more_carry_32: mov eax, dword [ebx] not eax mov dword [edi], eax add edi, 4 ; increment by dword size add ebx, 4 ; loop .top_loop_no_more_carry_32 sub ecx, 1 cmp ecx, 0 jg .top_loop_no_more_carry_32 .bottom: mov eax, dword [r] ; return r in eax UNFRAME ebx ret ;; neg_bn ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r *= -1 ;neg_a_bn PROC r:bn_t ; r is passed in ebp + 8 CGLOBAL neg_a_bn neg_a_bn: %define r ebp + 8 FRAME ebx mov ecx, [bnlength] mov ebx, dword [r] shr ecx, 2 ; byte = 1/4 dword .top_loop_32: neg dword [ebx] jc short .no_more_carry_32 ; notice the "reverse" logic here add ebx, 4 ; loop .top_loop_32 sub ecx, 1 cmp ecx, 0 jg .top_loop_32 jmp short .bottom .no_more_carry_32: add ebx, 4 ; loop .top_loop_no_more_carry_32 ; jump down sub ecx, 1 cmp ecx, 0 jg .top_loop_no_more_carry_32 ; jump down jmp short .bottom .top_loop_no_more_carry_32: not dword [ebx] add ebx, 4 ; loop .top_loop_no_more_carry_32 sub ecx, 1 cmp ecx, 0 jg .top_loop_no_more_carry_32 .bottom: mov eax, dword [r] ; return r in eax UNFRAME ebx ret ;; neg_a_bn ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r = 2*n ; double_bn PROC USES di, r:bn_t, n:bn_t ; r & n passed in ebp + 8 ebp + 12 CGLOBAL double_bn double_bn: %define r ebp + 8 %define n ebp + 12 FRAME ebx mov ecx, [bnlength] mov edi, dword [r] mov ebx, dword [n] shr ecx, 2 ; byte = 1/4 dword clc ; clear carry flag lahf ; save carry flag - uses ah .top_loop_32: sahf ; restore carry flag - so we can use cmp below mov eax, dword [ebx] rcl eax, 1 ; rotate with carry left mov dword [edi], eax lahf ; save carry flag add edi, 4 ; increment by dword size add ebx, 4 ; loop .top_loop_32 sub ecx, 1 cmp ecx, 0 jg .top_loop_32 mov eax, dword [r] ; return r in eax UNFRAME ebx ret ;; double_bn ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r *= 2 ; double_a_bn PROC r:bn_t ; r is passed in ebp + 8 CGLOBAL double_a_bn double_a_bn: %define r ebp + 8 FRAME ebx mov ecx, [bnlength] mov ebx, dword [r] shr ecx, 2 ; byte = 1/4 dword clc ; clear carry flag lahf ; save carry flag - uses ah .top_loop_32: sahf ; restore carry flag - so we can use cmp below rcl dword [ebx], 1 ; rotate with carry left lahf ; save carry flag add ebx, 4 ; increment by dword size ; loop .top_loop_32 sub ecx, 1 cmp ecx, 0 jg .top_loop_32 mov eax, dword [r] ; return r in eax UNFRAME ebx ret ;; double_a_bn ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r = n/2 ; half_bn PROC USES di, r:bn_t, n:bn_t ; r & n passed in ebp + 8 ebp + 12 CGLOBAL half_bn half_bn: %define r ebp + 8 %define n ebp + 12 FRAME ebx mov ecx, [bnlength] mov edi, dword [r] mov ebx, dword [n] add edi, ecx ; start with msb add ebx, ecx shr ecx, 2 ; byte = 1/4 dword sub edi, 4 ; decrement by double word size sub ebx, 4 mov eax, dword [ebx] sar eax, 1 ; shift arithmetic right mov dword [edi], eax lahf ; save carry flag ; loop .top_loop_32 sub ecx, 1 cmp ecx, 0 jg .top_loop_32 jmp short .bottom .top_loop_32: sub edi, 4 ; decrement by dword size sub ebx, 4 sahf ; restore carry flag mov eax, dword [ebx] rcr eax, 1 ; rotate with carry right mov dword [edi], eax lahf ; save carry flag ; loop .top_loop_32 sub ecx, 1 cmp ecx, 0 jg .top_loop_32 .bottom: mov eax, dword [r] ; return r in eax UNFRAME ebx ret ;; half_bn ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r /= 2 ; half_a_bn PROC r:bn_t ; r is passed in ebp + 8 CGLOBAL half_a_bn half_a_bn: %define r ebp + 8 FRAME ebx mov ecx, [bnlength] mov ebx, dword [r] add ebx, ecx ; start with msb shr ecx, 2 ; byte = 1/4 dword sub ebx, 4 ; decrement by dword size sar dword [ebx], 1 ; shift arithmetic right lahf ; save carry flag ; loop .top_loop_32 sub ecx, 1 cmp ecx, 0 jg .top_loop_32 jmp short .bottom .top_loop_32: sub ebx, 4 ; decrement by dword size sahf ; restore carry flag rcr dword [ebx], 1 ; rotate with carry right ; loop .top_loop_32 sub ecx, 1 cmp ecx, 0 jg .top_loop_32 .bottom: mov eax, dword [r] ; return r in eax UNFRAME ebx ret ;;half_a_bn ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r = n1 * n2 ; Note: r will be a double wide result, 2*bnlength ; n1 and n2 can be the same pointer ; SIDE-EFFECTS: n1 and n2 are changed to their absolute values ; ; unsafe_full_mult_bn PROC USES di si, r:bn_t, n1:bn_t, n2:bn_t ; r, n1, & n2 passed in ebp + 8 ebp + 12 ebp + 16 CGLOBAL unsafe_full_mult_bn unsafe_full_mult_bn: %define r ebp + 8 %define n1 ebp + 12 %define n2 ebp + 16 ;LOCAL sign1:byte, sign2:byte, samevar:byte, \ ; i:dword, j:dword, steps:dword, doublesteps:dword, carry_steps:dword, \ ; n1p: ptr, n2p: ptr ; pushed 12 bytes for saved ebx, esi, & edi, plus 40 for locals %define sign1 ebp - 16 %define sign2 ebp - 16 + 2 %define samevar ebp - 20 %define n1p ebp - 24 %define n2p ebp - 28 %define i ebp - 32 %define j ebp - 36 %define doublesteps ebp - 40 %define carry_steps ebp - 44 %define steps ebp - 48 FRAME ebx, esi, edi sub esp, 40 ; save space for locals ; Test to see if n1 and n2 are the same variable. It would be better to ; use square_bn(), but it could happen. mov word [samevar], 0 ; assume they are not the same mov ebx, dword [n1] cmp ebx, dword [n2] ; compare offset jne .end_samevar_check ; not the same mov word [samevar], 1 ; they are the same .end_samevar_check: ; By forcing the bignumber to be positive and keeping track of the sign ; bits separately, quite a few multiplies are saved. ; check for sign bits add ebx, [bnlength] mov al, byte [ebx-1] and al, 80h ; check the sign bit mov byte [sign1], al jz .already_pos1 ; invoke neg_a_bn, n1 mov ebx, dword [n1] push ebx call neg_a_bn add esp, 4 .already_pos1: cmp word [samevar], 1 ; if it's the same variable je .already_pos2 ; then skip this second check mov ebx, dword [n2] add ebx, [bnlength] mov al, byte [ebx-1] and al, 80h ; check the sign bit mov byte [sign2], al jz .already_pos2 ; invoke neg_a_bn, n2 mov ebx, dword [n2] push ebx call neg_a_bn add esp, 4 .already_pos2: ; in the following loops, the following pointers are used ; n1p, n2p = points to the part of n1, n2 being used ; edi = points to part of doublebignumber r used in outer loop ; esi = points to part of doublebignumber r used in inner loop ; ebx = points to part of doublebignumber r for carry flag loop ; set variables mov edx, [bnlength] ; set outer loop counter shr edx, 2 ; byte = 1/4 dword mov [steps], edx ; save in steps mov [i], edx shl edx, 1 ; double steps ; clear r sub eax, eax ; clear eax mov ecx, edx ; size of doublebignumber in dwords mov edi, dword [r] ; load r in edi for stos rep stosd ; initialize r to 0 sub edx, 2 ; only 2*s-2 steps are really needed mov [doublesteps], edx mov [carry_steps], edx ; prepare segments and offsets for loops mov edi, dword [r] mov esi, edi ; both esi and edi are used here mov eax, dword [n1] ; load pointers mov dword [n1p], eax .top_outer_loop_32: mov eax, dword [n2] ; set n2p pointer mov [n2p], eax mov eax, [steps] ; set inner loop counter mov [j], eax .top_inner_loop_32: mov ebx, [n1p] mov eax, dword [ebx] mov ebx, [n2p] mul dword [ebx] mov ebx, esi add ebx, 4 ; increase by size of dword add dword [ebx-4], eax ; add low dword adc dword [ebx], edx ; add high dword jnc .no_more_carry_32 ; carry loop not necessary mov ecx, [carry_steps] ; how many till end of double big number jcxz .no_more_carry_32 add ebx, 4 ; move pointer to next dword ; loop until no more carry or until end of double big number .top_carry_loop_32: add dword [ebx], 1 ; use add, not inc jnc .no_more_carry_32 add ebx, 4 ; increase by size of dword loop .top_carry_loop_32 .no_more_carry_32: add dword [n2p], 4 ; increase by dword size add esi, 4 sub dword [carry_steps], 1 ; use one less step sub dword [j], 1 ja .top_inner_loop_32 add dword [n1p], 4 ; increase by dword size add edi, 4 mov esi, edi ; start with esi=edi sub dword [doublesteps], 1 ; reduce the carry steps needed mov eax, [doublesteps] mov [carry_steps], eax sub dword [i], 1 ja .top_outer_loop_32 ; result is now r, a double wide bignumber .bottom: cmp word [samevar], 1 ; were the variable the same ones? je .pos_answer ; if yes, then jump mov al, byte [sign1] ; is result + or - ? cmp al, byte [sign2] ; sign(n1) == sign(n2) ? je .pos_answer ; yes shl dword [bnlength], 1 ; temporarily double bnlength ; for double wide bignumber ; invoke neg_a_bn, r ; does not affect ES mov ebx, dword [r] push ebx call neg_a_bn add esp, 4 shr dword [bnlength], 1 ; restore bnlength .pos_answer: mov eax, dword [r] ; return r in eax UNFRAME ebx, esi, edi ret ;; unsafe_full_mult_bn ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r = n1 * n2 calculating only the top rlength bytes ; Note: r will be of length rlength ; 2*bnlength <= rlength < bnlength ; n1 and n2 can be the same pointer ; SIDE-EFFECTS: n1 and n2 are changed to their absolute values ; ; unsafe_mult_bn PROC USES di si, r:bn_t, n1:bn_t, n2:bn_t ; r, n1, & n2 passed in ebp + 8 ebp + 12 ebp + 16 CGLOBAL unsafe_mult_bn unsafe_mult_bn: %define r ebp + 8 %define n1 ebp + 12 %define n2 ebp + 16 ;LOCAL sign1:byte, sign2:byte, samevar:byte, \ ; i:dword, j:dword, steps:dword, doublesteps:dword, \ ; carry_steps:dword, skips:dword, \ ; n1p: ptr, n2p: ptr ; pushed 12 bytes for saved ebx, esi, & edi, plus 40 for locals %define sign1 ebp - 16 %define sign2 ebp - 16 + 2 %define samevar ebp - 20 %define n1p ebp - 24 %define n2p ebp - 28 %define i ebp - 32 %define j ebp - 36 %define doublesteps ebp - 40 %define carry_steps ebp - 44 %define steps ebp - 48 %define skips ebp - 52 FRAME ebx, esi, edi sub esp, 40 ; save space for locals ; Test to see if n1 and n2 are the same variable. It would be better to ; use square_bn(), but it could happen. mov word [samevar], 0 ; assume they are not the same mov ebx, dword [n1] cmp ebx, dword [n2] ; compare offset jne .end_samevar_check ; not the same mov word [samevar], 1 ; they are the same .end_samevar_check: ; By forcing the bignumber to be positive and keeping track of the sign ; bits separately, quite a few multiplies are saved. ; check for sign bits add ebx, [bnlength] mov al, byte [ebx-1] and al, 80h ; check the sign bit mov byte [sign1], al jz .already_pos1 ; invoke neg_a_bn, n1 mov ebx, dword [n1] push ebx call neg_a_bn add esp, 4 .already_pos1: cmp word [samevar], 1 ; if it's the same variable je .already_pos2 ; then skip this second check mov ebx, dword [n2] add ebx, [bnlength] mov al, byte [ebx-1] and al, 80h ; check the sign bit mov byte [sign2], al jz .already_pos2 ; invoke neg_a_bn, n2 mov ebx, dword [n2] push ebx call neg_a_bn add esp, 4 .already_pos2: ; adjust n2 pointer for partial precision mov eax, [bnlength] shl eax, 1 ; 2*bnlength sub eax, [rlength] ; 2*bnlength-rlength add dword [n2], eax ; n2 = n2+2*bnlength-rlength ; in the following loops, the following pointers are used ; n1p, n2p = points to the part of n1, n2 being used ; edi = points to part of doublebignumber used in outer loop ; esi = points to part of doublebignumber used in inner loop ; ebx = points to part of doublebignumber for carry flag loop ; clear r sub eax, eax ; clear eax mov ecx, [rlength] ; size of r in bytes shr ecx, 2 ; byte = 1/4 dword mov edi, dword [r] ; load r in edi for stos rep stosd ; initialize r to 0 ; set variables mov eax, [rlength] ; set steps for first loop sub eax, [bnlength] shr eax, 2 ; byte = 1/4 dword mov [steps], eax ; save in steps mov eax, [bnlength] shr eax, 2 ; byte = 1/4 dword mov [i], eax sub eax, [steps] mov [skips], eax ; how long to skip over pointer shifts mov eax, [rlength] ; set steps for first loop shr eax, 2 ; byte = 1/4 dword sub eax, 2 ; only rlength/4-2 steps are really needed mov [doublesteps], eax mov [carry_steps], eax ; prepare segments and offsets for loops mov edi, dword [r] mov esi, edi ; both si and di are used here mov eax, dword [n1] ; load pointers mov [n1p], eax .top_outer_loop_32: mov eax, dword [n2] ; set n2p pointer mov [n2p], eax mov eax, [steps] ; set inner loop counter mov [j], eax .top_inner_loop_32: mov ebx, [n1p] mov eax, dword [ebx] mov ebx, [n2p] mul dword [ebx] mov ebx, esi add ebx, 4 ; increase by size of dword add dword [ebx-4], eax ; add low dword adc dword [ebx], edx ; add high dword jnc .no_more_carry_32 ; carry loop not necessary mov ecx, [carry_steps] ; how many till end of double big number jcxz .no_more_carry_32 add ebx, 4 ; move pointer to next dword ; loop until no more carry or until end of r .top_carry_loop_32: add dword [ebx], 1 ; use add, not inc jnc .no_more_carry_32 add ebx, 4 ; increase by size of dword loop .top_carry_loop_32 .no_more_carry_32: add dword [n2p], 4 ; increase by dword size add esi, 4 sub dword [carry_steps], 1 ; use one less step sub dword [j], 1 ja .top_inner_loop_32 add dword [n1p], 4 ; increase by dword size cmp dword [skips], 0 je .type2_shifts_32 sub dword [n2], 4 ; shift n2 back a dword add dword [steps], 1 ; one more step this time ; leave edi and doublesteps where they are sub dword [skips], 1 ; keep track of how many times we've done this jmp .shifts_bottom_32 .type2_shifts_32: add edi, 4 ; shift edi forward a dword sub dword [doublesteps], 1 ; reduce the carry steps needed .shifts_bottom_32: mov esi, edi ; start with esi=edi mov eax, [doublesteps] mov [carry_steps], eax sub dword [i], 1 ja .top_outer_loop_32 ; result is in r bottom: cmp word [samevar], 1 ; were the variable the same ones? je .pos_answer ; if yes, then jump mov al, byte [sign1] ; is result + or - ? cmp al, byte [sign2] ; sign(n1) == sign(n2) ? je .pos_answer ; yes push dword [bnlength] ; save bnlength mov eax, [rlength] mov [bnlength], eax ; set bnlength = rlength ; invoke neg_a_bn, r ; does not affect ES mov ebx, dword [r] push ebx call neg_a_bn add esp, 4 pop dword [bnlength] ; restore bnlength .pos_answer: mov eax, dword [r] ; return r in eax UNFRAME ebx, esi, edi ret ;;unsafe_mult_bn ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r = n^2 ; because of the symetry involved, n^2 is much faster than n*n ; for a bignumber of length l ; n*n takes l^2 multiplications ; n^2 takes (l^2+l)/2 multiplications ; which is about 1/2 n*n as l gets large ; uses the fact that (a+b+c+...)^2 = (a^2+b^2+c^2+...)+2(ab+ac+bc+...) ; ; Note: r will be a double wide result, 2*bnlength ; SIDE-EFFECTS: n is changed to its absolute value ; ; unsafe_full_square_bn PROC USES di si, r:bn_t, n:bn_t ; r & n passed in ebp + 8 ebp + 12 CGLOBAL unsafe_full_square_bn unsafe_full_square_bn: %define r ebp + 8 %define n ebp + 12 ;LOCAL i:dword, j:dword, steps:dword, doublesteps:dword, carry_steps:dword, \ ; rp1: ptr, rp2: ptr ; pushed 12 bytes for saved ebx, esi, & edi, plus 40 for locals %define rp1 ebp - 16 %define rp2 ebp - 20 %define i ebp - 24 %define j ebp - 28 %define doublesteps ebp - 32 %define carry_steps ebp - 36 %define steps ebp - 40 FRAME ebx, esi, edi sub esp, 40 ; save space for locals ; By forcing the bignumber to be positive and keeping track of the sign ; bits separately, quite a few multiplies are saved. ; check for sign bit mov ebx, dword [n] add ebx, [bnlength] mov al, byte [ebx-1] and al, 80h ; check the sign bit jz .already_pos ; invoke neg_a_bn, n mov ebx, dword [n] push ebx call neg_a_bn add esp, 4 .already_pos: ; in the following loops, the following pointers are used ; n1p(edi), n2p(esi) = points to the parts of n being used ; rp1 = points to part of doublebignumber used in outer loop ; rp2 = points to part of doublebignumber used in inner loop ; ebx = points to part of doublebignumber for carry flag loop mov ecx, dword [bnlength] ; size of doublebignumber in words ; clear r sub eax, eax ; clear eax ; 2{twice the size}*bnlength/4{bytes per word} shr ecx, 1 ; size of doublebignumber in dwords mov edi, dword [r] ; load r pointer in edi for stos rep stosd ; initialize r to 0 ; initialize vars mov edx, [bnlength] ; set outer loop counter shr edx, 2 ; byte = 1/4 dword sub edx, 1 ; don't need to do last one mov [i], edx ; loop counter mov [steps], edx ; save in steps shl edx, 1 ; double steps sub edx, 1 ; only 2*s-1 steps are really needed mov [doublesteps], edx mov [carry_steps], edx ; initialize pointers mov edi, dword [n] ; load n1p pointer mov eax, dword [r] add eax, 4 ; start with second dword mov [rp1], eax mov [rp2], eax ; start with rp2=rp1 cmp dword [i], 0 ; if bignumberlength is 4 je .skip_middle_terms_32 .top_outer_loop_32: mov esi, edi ; set n2p pointer add esi, 4 ; to 1 dword beyond n1p(di) mov eax, [steps] ; set inner loop counter mov [j], eax .top_inner_loop_32: mov eax, dword [edi] mul dword [esi] mov ebx, [rp2] add ebx, 4 ; increase by size of dword add dword [ebx-4], eax ; add low dword adc dword [ebx], edx ; add high dword jnc .no_more_carry_32 ; carry loop not necessary mov ecx, [carry_steps] ; how many till end of double big number jcxz .no_more_carry_32 add ebx, 4 ; move pointer to next dword ; loop until no more carry or until end of double big number .top_carry_loop_32: add dword [ebx], 1 ; use add, not inc jnc .no_more_carry_32 add ebx, 4 ; increase by size of dword loop .top_carry_loop_32 .no_more_carry_32: add esi, 4 ; increase by dword size add dword [rp2], 4 sub dword [carry_steps], 1 ; use one less step sub dword [j], 1 ja .top_inner_loop_32 add edi, 4 ; increase by dword size add dword [rp1], 8 ; increase by 2*dword size mov eax, [rp1] mov [rp2], eax ; start with rp2=rp1 sub dword [doublesteps], 2 ; reduce the carry steps needed mov eax, [doublesteps] mov [carry_steps], eax sub dword [steps], 1 ; use one less step sub dword [i], 1 ja .top_outer_loop_32 ; All the middle terms have been multiplied. Now double it. shl dword [bnlength], 1 ; r is a double wide bignumber ; invoke double_a_bn, r mov ebx, dword [r] push ebx call double_a_bn add esp, 4 shr dword [bnlength], 1 ; restore bnlength .skip_middle_terms_32: ; ds is not necessarily restored here ; Now go back and add in the squared terms. ; In the following loops, the following pointers are used ; n1p(edi) = points to the parts of n being used ; rp1(esi) = points to part of doublebignumber used in outer loop ; ebx = points to part of doublebignumber for carry flag loop mov edi, dword [n] ; load n1p pointer in edi mov edx, [bnlength] ; set outer loop counter shr edx, 2 ; 1 bytes = 1/4 dword mov dword [i], edx ; loop counter shl edx, 1 ; double steps sub edx, 2 ; only 2*s-2 steps are really needed mov [doublesteps], edx mov [carry_steps], edx mov esi, dword [r] ; set rp1 .top_outer_loop_squares_32: mov eax, dword [edi] mul eax ; square it mov ebx, esi add ebx, 4 ; increase by size of dword add dword [ebx-4], eax ; add low dword adc dword [ebx], edx ; add high dword jnc .no_more_carry_squares_32 ; carry loop not necessary mov ecx, dword [carry_steps] ; how many till end of double big number jcxz .no_more_carry_squares_32 add ebx, 4 ; move pointer to next dword ; loop until no more carry or until end of double big number .top_carry_loop_squares_32: add dword [ebx], 1 ; use add, not inc jnc .no_more_carry_squares_32 add ebx, 4 ; increase by size of dword loop .top_carry_loop_squares_32 .no_more_carry_squares_32: add edi, 4 ; increase by dword size add esi, 8 ; increase by 2*dword size sub dword [doublesteps], 2 ; reduce the carry steps needed mov eax, [doublesteps] mov [carry_steps], eax sub dword [i], 1 ja .top_outer_loop_squares_32 ; result is in r, a double wide bignumber ; since it is a square, the result has to already be positive mov eax, dword [r] ; return r in eax UNFRAME ebx, esi, edi ret ;;unsafe_full_square_bn ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r = n^2 ; because of the symetry involved, n^2 is much faster than n*n ; for a bignumber of length l ; n*n takes l^2 multiplications ; n^2 takes (l^2+l)/2 multiplications ; which is about 1/2 n*n as l gets large ; uses the fact that (a+b+c+...)^2 = (a^2+b^2+c^2+...)+2(ab+ac+bc+...) ; ; Note: r will be of length rlength ; 2*bnlength >= rlength > bnlength ; SIDE-EFFECTS: n is changed to its absolute value ; ; unsafe_square_bn PROC USES di si, r:bn_t, n:bn_t ; r & n passed in ebp + 8 ebp + 12 CGLOBAL unsafe_square_bn unsafe_square_bn: %define r ebp + 8 %define n ebp + 12 ;LOCAL i:dword, j:dword, steps:dword, doublesteps:dword, carry_steps:dword, \ ; skips:dword, rodd:dword, \ ; n3p: ptr, \ ; rp1: ptr, rp2: ptr %define rp1 ebp - 16 %define rp2 ebp - 20 %define n3p ebp - 24 %define i ebp - 28 %define j ebp - 32 %define doublesteps ebp - 36 %define carry_steps ebp - 40 %define steps ebp - 44 %define skips ebp - 48 %define rodd ebp - 52 FRAME ebx, esi, edi sub esp, 40 ; save space for locals ; This whole procedure would be a great deal simpler if we could assume that ; rlength < 2*bnlength (that is, not =). Therefore, we will take the ; easy way out and call full_square_bn() if it is. mov eax, [rlength] shr eax, 1 ; 1/2 * rlength cmp eax, dword [bnlength] ; 1/2 * rlength == bnlength? jne .not_full_square ; invoke unsafe_full_square_bn, r, n mov eax, [n] push eax mov eax, [r] push eax call unsafe_full_square_bn add esp, 8 ; eax is still loaded with return value jmp .quit_proc ; we're outa here .not_full_square: ; By forcing the bignumber to be positive and keeping track of the sign ; bits separately, quite a few multiplies are saved. ; check for sign bit mov ebx, dword [n] ; load n1 pointer in ebx add ebx, [bnlength] mov al, byte [ebx-1] and al, 80h ; check the sign bit jz .already_pos ; invoke neg_a_bn, n mov ebx, dword [n] push ebx call neg_a_bn add esp, 4 .already_pos: ; in the following loops, the following pointers are used ; n1p(edi), n2p(esi) = points to the parts of n being used ; rp1 = points to part of doublebignumber used in outer loop ; rp2 = points to part of doublebignumber used in inner loop ; ebx = points to part of doublebignumber for carry flag loop ; clear r sub eax, eax ; clear eax mov ecx, [rlength] ; size of rlength in bytes shr ecx, 2 ; byte = 1/4 dword mov edi, dword [r] ; load r pointer in edi for stos rep stosd ; initialize r to 0 ; initialize vars ; determine whether r is on an odd or even dword in the number ; (even if rlength==2*bnlength, dec r alternates odd/even) mov eax, [bnlength] shl eax, 1 ; double wide width sub eax, [rlength] ; 2*bnlength-rlength shr eax, 2 ; 1 byte = 1/4 dword and eax, 0001h ; check the odd sign bit mov [rodd], eax mov eax, [bnlength] ; set outer loop counter shr eax, 2 ; byte = 1/4 dword sub eax, 1 ; don't need to do last one mov dword [i], eax ; loop counter mov eax, [rlength] ; set steps for first loop sub eax, [bnlength] shr eax, 2 ; byte = 1/4 dword mov dword [steps], eax ; save in steps mov edx, [bnlength] shr edx, 2 ; bnlength/4 add eax, edx ; steps+bnlength/4 sub eax, 2 ; steps+bnlength/4-2 mov [doublesteps], eax mov [carry_steps], eax mov eax, [i] sub eax, [steps] shr eax, 1 ; for both words and dwords mov dword [skips], eax ; how long to skip over pointer shifts ; initialize pointers mov edi, dword [n] ; load n1p pointer mov esi, edi mov eax, [bnlength] shr eax, 2 ; 1 byte = 1/4 dword sub eax, [steps] shl eax, 2 ; 1 byte = 1/4 dword add esi, eax ; n2p = n1p + bnlength/4 - steps mov dword [n3p], esi ; save for later use mov eax, dword [r] mov [rp1], eax mov [rp2], eax ; start with rp2=rp1 cmp dword [i], 0 ; if bignumberlength is 8 je .skip_middle_terms_32 .top_outer_loop_32: mov eax, [steps] ; set inner loop counter mov [j], eax .top_inner_loop_32: mov eax, dword [edi] mul dword [esi] mov ebx, [rp2] add ebx, 4 ; increase by size of dword add dword[ebx-4], eax ; add low dword adc dword [ebx], edx ; add high dword jnc .no_more_carry_32 ; carry loop not necessary mov ecx, [carry_steps] ; how many till end of double big number jcxz .no_more_carry_32 add ebx, 4 ; move pointer to next dword ; loop until no more carry or until end of double big number .top_carry_loop_32: add dword [ebx], 1 ; use add, not inc jnc .no_more_carry_32 add ebx, 4 ; increase by size of dword loop .top_carry_loop_32 .no_more_carry_32: add esi, 4 ; increase by dword size add dword [rp2], 4 sub dword [carry_steps], 1 ; use one less step sub dword [j], 1 ja .top_inner_loop_32 add edi, 4 ; increase by dword size mov eax, [rodd] ; whether r is on an odd or even dword cmp dword [skips], 0 jle .type2_shifts_32 sub dword [n3p], 4 ; point to previous dword mov esi, [n3p] add dword [steps], 1 ; one more step this time ; leave rp1 and doublesteps where they are sub dword [skips], 1 jmp .shifts_bottom_32 .type2_shifts_32: ; only gets executed once jl .type3_shifts_32 sub [steps], eax ; steps -= (0 or 1) add eax, 1 ; eax = 1 or 2 now sub [doublesteps], eax ; decrease double steps by 1 or 2 shl eax, 2 ; 1 byte = 1/4 dword add [rp1], eax ; add 1 or 2 dwords mov esi, edi add esi, 4 ; esi = edi + dword sub dword [skips], 1 ; make skips negative jmp .shifts_bottom_32 .type3_shifts_32: sub dword [steps], 1 sub dword [doublesteps], 2 add dword [rp1], 8 ; + two dwords mov esi, edi add esi, 4 ; esi = edi + dword .shifts_bottom_32: mov eax, [rp1] mov [rp2], eax ; start with rp2=rp1 mov eax, [doublesteps] mov [carry_steps], eax sub dword [i], 1 ja .top_outer_loop_32 ; All the middle terms have been multiplied. Now double it. push dword [bnlength] ; save bnlength mov eax, [rlength] mov [bnlength], eax ; r is of length rlength ; invoke double_a_bn, r mov ebx, dword [r] push ebx call double_a_bn add esp, 4 pop dword [bnlength] .skip_middle_terms_32: ; Now go back and add in the squared terms. ; In the following loops, the following pointers are used ; n1p(edi) = points to the parts of n being used ; rp1(esi) = points to part of doublebignumber used in outer loop ; ebx = points to part of doublebignumber for carry flag loop ; be careful, the next dozen or so lines are confusing! ; determine whether r is on an odd or even word in the number mov eax, [bnlength] shl eax, 1 ; double wide width sub eax, [rlength] ; 2*bnlength-rlength mov edx, eax ; save this for a moment and eax, 0004h ; check the odd sign bit mov esi, dword [r] ; load r pointer in esi add esi, eax ; depending on odd or even byte shr edx, 2 ; assumes dword size add edx, 1 and edx, 0FFFEh ; ~2+1, turn off last bit, mult of 2 shl edx, 2 mov edi, dword [n] ; load n1p pointer in edi add edi, edx mov eax, [bnlength] sub eax, edx shr eax, 2 ; 1 byte = 1/4 dword mov [i], eax shl eax, 1 ; double steps sub eax, 2 ; only 2*s-2 steps are really needed mov [doublesteps], eax mov [carry_steps], eax .top_outer_loop_squares_32: mov eax, dword [edi] mul eax ; square it mov ebx, esi add ebx, 4 ; increase by size of dword add dword [ebx-4], eax ; add low dword adc dword [ebx], edx ; add high dword jnc .no_more_carry_squares_32 ; carry loop not necessary mov ecx, [carry_steps] ; how many till end of double big number jcxz .no_more_carry_squares_32 add ebx, 4 ; move pointer to next dword ; loop until no more carry or until end of double big number .top_carry_loop_squares_32: add dword [ebx], 1 ; use add, not inc jnc .no_more_carry_squares_32 add ebx, 4 ; increase by size of dword loop .top_carry_loop_squares_32 .no_more_carry_squares_32: add edi, 4 ; increase by dword size add esi, 8 ; increase by 2*dword size sub dword [doublesteps], 2 ; reduce the carry steps needed mov eax, [doublesteps] mov [carry_steps], eax sub dword [i], 1 ja .top_outer_loop_squares_32 ; result is in r ; since it is a square, the result has to already be positive mov eax, dword [r] ; return r in eax .quit_proc: UNFRAME ebx, esi, edi ret ;;unsafe_square_bn ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r = n * u where u is an unsigned integer ; mult_bn_int PROC USES di si, r:bn_t, n:bn_t, u:dword ; r, n, & u passed in ebp + 8 ebp + 12 ebp + 16 CGLOBAL mult_bn_int mult_bn_int: %define r ebp + 8 %define n ebp + 12 %define u ebp + 16 ;;%define lu r9d ; LOCAL lu:dword ; long unsigned integer in 32 bit math FRAME ebx mov ecx, [bnlength] mov edi, dword [r] mov esi, dword [n] ; no need to clear r shr ecx, 2 ; byte = 1/4 dword sub ebx, ebx ; use ebx for temp holding carried dword sub eax, eax ; clear upper eax ;; mov eax, [u] ; convert u (unsigned int) ;; mov lu, eax ; to lu (long unsigned int) .top_loop_32: mov eax, dword [esi] ; load next dword from n mul dword [u] ; n * u add eax, ebx ; add last carried upper dword adc edx, 0 ; inc the carried dword if carry flag set mov ebx, edx ; save high dword in ebx mov dword [edi], eax ; save low dword add edi, 4 ; next dword in r add esi, 4 ; next dword in n ; loop .top_loop_32 sub ecx, 1 cmp ecx, 0 jg .top_loop_32 mov eax, dword [r] ; return r in eax UNFRAME ebx ret ;; mult_bn_int ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r *= u where u is an unsigned integer ; mult_a_bn_int PROC USES di si, r:bn_t, u:dword ; r & u passed in ebp + 8 ebp + 12 CGLOBAL mult_a_bn_int mult_a_bn_int: %define r ebp + 8 %define u ebp + 12 FRAME ebx mov ecx, [bnlength] ; set outer loop counter mov esi, dword [r] ; no need to clear r shr ecx, 2 ; byte = 1/4 dword sub ebx, ebx ; use ebx for temp holding carried dword sub edi, edi ; clear upper edi mov edi, [u] ; save u in lower edi .top_loop_32: mov eax, dword [esi] ; load next dword from r mul edi ; r * u add eax, ebx ; add last carried upper dword adc edx, 0 ; inc the carried dword if carry flag set mov ebx, edx ; save high dword in ebx mov dword [esi], eax ; save low dword add esi, 4 ; next dword in r ; loop .top_loop_32 sub ecx, 1 cmp ecx, 0 jg .top_loop_32 mov eax, dword [r] ; return r in eax UNFRAME ebx ret ;; mult_a_bn_int ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r = n / u where u is an unsigned integer ; unsafe_div_bn_int PROC USES di si, r:bn_t, n:bn_t, u:dword ; r, n, & u passed in ebp + 8 ebp + 12 ebp + 16 CGLOBAL unsafe_div_bn_int unsafe_div_bn_int: %define r ebp + 8 %define n ebp + 12 %define u ebp + 16 ; LOCAL sign:byte %define sign ebp - 8 FRAME ebx sub esp, 8 ; room for local variable "sign" ; check for sign bits mov ebx, dword [n] add ebx, [bnlength] mov al, byte [ebx-1] and al, 80h ; check the sign bit mov byte [sign], al jz .already_pos ; invoke neg_a_bn, n mov eax, dword [n] push eax ; pass n call neg_a_bn add esp, 4 .already_pos: mov ecx, [bnlength] ; set outer loop counter mov edi, dword [r] mov esi, dword [n] ; load pointers edi, esi ; past most significant portion of the number add esi, ecx add edi, ecx ; no need to clear r here, values get mov'ed, not add'ed shr ecx, 2 ; byte = 1/4 dword sub ebx, ebx ; clear upper word of ebx mov ebx, [u] ; need to start with most significant portion of the number sub esi, 4 ; most sig dword sub edi, 4 ; most sig dword sub edx, edx ; clear edx register ; for first time through loop .top_loop_32: mov eax, dword [esi] ; load next dword from n div ebx mov dword [edi], eax ; store low dword ; leave remainder in edx sub esi, 4 ; next dword in n sub edi, 4 ; next dword in r ; loop .top_loop_32 sub ecx, 1 cmp ecx, 0 jg .top_loop_32 cmp byte [sign], 0 ; is result + or - ? je .pos_answer ; yes ; invoke neg_a_bn, r mov eax, dword [r] push eax ; pass r call neg_a_bn add esp, 4 .pos_answer: mov eax, dword [r] ; return r in eax UNFRAME ebx ret ;;unsafe_div_bn_int ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r /= u where u is an unsigned integer ; div_a_bn_int PROC USES si, r:bn_t, u:word ; r & u passed in ebp + 8 ebp + 12 CGLOBAL div_a_bn_int div_a_bn_int: %define r ebp + 8 %define u ebp + 12 ;LOCAL sign:byte %define sign ebp - 8 FRAME ebx sub esp, 8 ; room for local variable "sign" mov ebx, dword [r] add ebx, [bnlength] mov al, byte [ebx-1] and al, 80h ; check the sign bit mov byte [sign], al jz .already_pos ; invoke neg_a_bn, r mov eax, dword [r] push eax ; pass r call neg_a_bn add esp, 4 .already_pos: mov ecx, [bnlength] ; set outer loop counter mov esi, dword [r] ; past most significant portion of the number add esi, ecx ; no need to clear r here, values get mov'ed, not add'ed shr ecx, 2 ; byte = 1/4 dword sub ebx, ebx ; clear upper word of ebx mov ebx, [u] ; need to start with most significant portion of the number sub esi, 4 ; most sig dword sub edx, edx ; clear edx register ; for first time through loop .top_loop_32: mov eax, dword [esi] ; load next dword from r div ebx mov dword [esi], eax ; store low dword ; leave remainder in edx sub esi, 4 ; next dword in r ; loop .top_loop_32 sub ecx, 1 cmp ecx, 0 jg .top_loop_32 cmp byte [sign], 0 ; is result + or - ? je .pos_answer ; yes ; invoke neg_a_bn, r mov eax, dword [r] push eax ; pass r call neg_a_bn add esp, 4 .pos_answer: mov eax, dword [r] ; return r in eax UNFRAME ebx ret ;; div_a_bn_int ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; bf_t routines ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r = 0 (just like clear_bn() but loads bflength+2 instead of bnlength) ; clear_bf PROC USES di, r:bf_t ; r is passed in ebp + 8 CGLOBAL clear_bf clear_bf: %define r ebp + 8 push ebp ; if FRAME not used, do this mov ebp, esp mov ecx, [bflength] mov edi, dword [r] sub eax, eax ; clear eax shr ecx, 2 ; 1 byte = 1/4 dword rep stosd ; clear r, dword at a time stosw ; plus the exponent mov eax, dword [r] ; return r in eax mov esp, ebp ; if UNFRAME not used, do this pop ebp ret ;;clear_bf ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; r = n ; copy_bf PROC USES di si, r:bf_t, n:bf_t ; r & n passed in ebp + 8 ebp + 12 CGLOBAL copy_bf copy_bf: %define r ebp + 8 %define n ebp + 12 push ebp ; if FRAME not used, do this mov ebp, esp mov ecx, [bflength] add ecx, 2 mov edi, dword [r] mov esi, dword [n] shr ecx, 2 ; 1 byte = 1/4 dword rep movsd ; copy dword at a time movsw ; plus the exponent mov eax, dword [r] ; return r in eax mov esp, ebp ; if UNFRAME not used, do this pop ebp ret ;;copy_bf ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; LDBL bftofloat(bf_t n); ; converts a bf number to a 10 byte real ; ; bftofloat PROC USES di si, n:bf_t ; n is passed in ebp + 8 CGLOBAL bftofloat bftofloat: %define n ebp + 8 ; LOCAL value[11]:BYTE ; 11=10+1 ; pushed 4 bytes for saved registers %define value(x) [ebp - 20 + x] ; 20 = pushed + allocated local memory FRAME ebx sub esp, 16 ; room for local variable "value" mov ecx, 9 ; need up to 9 bytes cmp dword [bflength], 10 ; but no more than bflength-1 jae .movebytes_set mov ecx, [bflength] ; bflength is less than 10 sub ecx, 1 ; ecx=movebytes=bflength-1, 1 byte padding .movebytes_set: ; clear value sub eax, eax mov dword value(0), eax ; clear first 4 bytes mov dword value(4), eax ; clear next 4 bytes mov word value(8), ax ; clear next 2 bytes mov byte value(10), al ; clear last byte ; copy bytes from n to value lea edi, value(9) sub edi, ecx ; ecx holds movebytes mov ebx, [bflength] sub ebx, 1 sub ebx, ecx ; ecx holds movebytes mov esi, dword [n] add esi, ebx ; n+bflength-1-movebytes rep movsb mov bl, byte [esi] ; save sign byte, esi now points to it add esi, 1 ; point to exponent mov dx, word [esi] ; use dx as exponent shl dx, 3 ; 256^n = 2^(8n) ; adjust for negative values and bl, 10000000b ; determine sign jz .not_neg_32 neg dword value(0) ; take the negative of the 9 byte number cmc ; toggle carry flag not dword value(4) adc dword value(4), 0 not byte value(8) ; notice this last one is a byte adc byte value(8), 0 .not_neg_32: cmp byte value(8), 0 ; test for 0 jnz .top_shift_32 fldz jmp .return ; Shift until most signifcant bit is set. .top_shift_32: test byte value(8), 10000000b ; test msb jnz .bottom_shift_32 sub dx, 1 ; decrement exponent shl dword value(0), 1 ; shift left the 9 byte number rcl dword value(4), 1 rcl byte value(8), 1 ; notice this last one is byte ptr jmp .top_shift_32 .bottom_shift_32: ; round last byte cmp byte value(0), 80h ; jb .bottom ; no rounding necessary add dword value(1), 1 adc dword value(5), 0 jnc .bottom ; to get to here, the pattern was rounded from +FFFF... ; to +10000... with the 1 getting moved to the carry bit .rounded_past_end: mov byte value(8), 10000000b add dx, 1 ; adjust the exponent .bottom: ; adjust exponent add dx, 3FFFh+7 ; unbiased -> biased, + adjusted or dh, bl ; set sign bit if set mov word value(9), dx ; unlike float and double, long double is returned on fpu stack fld tword value(1) ; load return value .return: UNFRAME ebx ret ;;bftofloat endp ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; LDBL floattobf(bf_t n, LDBL f); ; converts a 10 byte real to a bf number ; ; floattobf PROC USES di si, n:bf_t, f:REAL10 ; n is passed in ebp + 8 ebp + 12 ; f is passed on the stack, pointed to by flt(x) CGLOBAL floattobf floattobf: %define n ebp + 8 %define flt(x) [ebp + 12 + x] ; LOCAL value[9]:BYTE ; 9=8+1 %define value(x) [ebp - 16 + x] ; 16 = allocated local memory push ebp ; if FRAME not used, do this mov ebp, esp sub esp, 16 ; room for local variable "value" ; invoke clear_bf, n mov eax, [n] push eax call clear_bf add esp, 4 ; check to see if f is 0 cmp byte flt(7), 0 ; f[7] can only be 0 if f is 0 ; jz return ; if f is 0, bailout now jnz .over_return jmp .return ; if f is 0, bailout now .over_return: mov ecx, 9 ; need up to 9 bytes cmp dword [bflength], 10 ; but no more than bflength-1 jae .movebytes_set mov ecx, [bflength] ; bflength is less than 10 sub ecx, 1 ; movebytes = bflength-1, 1 byte padding .movebytes_set: ; copy bytes from flt's mantissa to value mov byte value(0), 0 ; clear least sig byte mov eax, dword flt(0) mov dword value(1), eax mov eax, dword flt(4) mov dword value(5), eax ; get exponent in dx mov dx, word flt(8) ; location of exponent and dx, 7FFFh ; remove sign bit sub dx, 3FFFh+7 ; biased -> unbiased, + adjust ; Shift down until exponent is a mult of 8 (2^8n=256n) .top_shift_32: test dx, 111b ; expon mod 8 jz .bottom add dx, 1 ; increment exponent shr dword value(5), 1 ; shift right the 9 byte number rcr dword value(1), 1 rcr byte value(0), 1 ; notice this last one is a byte jmp .top_shift_32 .bottom: ; Don't bother rounding last byte as it would only make a difference ; when bflength < 9, and then only on the last bit. ; move data into place, from value to n lea esi, value(9) sub esi, ecx ; ecx holds movebytes mov edi, dword [n] add edi, [bflength] sub edi, 1 sub edi, ecx ; ecx holds movebytes rep movsb add edi, 1 sar dx, 3 ; divide expon by 8, 256^n=2^8n mov word [edi], dx ; store exponent ; get sign test byte flt(9), 10000000b ; test sign bit jz .not_negative ; invoke neg_a_bf, n mov eax, [n] push eax call neg_a_bf add esp, 4 .not_negative: .return: mov eax, dword [n] ; return r in eax mov esp, ebp ; if UNFRAME not used, do this pop ebp ret ;;floattobf endp ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; LDBL bntofloat(bf_t n); ; converts a bn number to a 10 byte real ; (the most speed critical of these to/from float routines) ;bntofloat PROC USES di si, n:bn_t ; n is passed in ebp + 8 CGLOBAL bntofloat bntofloat: %define n ebp + 8 ; LOCAL value[11]:BYTE ; 11=10+1 %define value(x) [ebp - 20 + x] %define value9 ebp - 20 + 9 ; 20 = pushed ebx + allocated local memory FRAME ebx sub esp, 16 ; room for local variable "value" ; determine the most significant byte, not 0 or FF mov esi, dword [n] sub esi, 1 add esi, [bnlength] ; n+bnlength-1 mov bl, byte [esi] ; top byte mov ecx, [bnlength] ; initialize ecx with full bnlength cmp bl, 0 ; test top byte against 0 je .determine_sig_bytes cmp bl, 0FFh ; test top byte against -1 jne .sig_bytes_determined .determine_sig_bytes: sub ecx, 1 ; now bnlength-1 .top_sig_byte: sub esi, 1 ; previous byte cmp byte [esi], bl ; does it have the right stuff? jne .sig_bytes_determined ; (ie: does it match top byte?) ; loop top_sig_byte ; decrement ecx and repeat sub ecx, 1 cmp ecx, 0 jg .top_sig_byte ; At this point, it must be 0 with no sig figs at all ; or -1/(256^bnlength), one bit away from being zero. cmp bl, 0 ; was it zero? jnz .not_zero ; no, it was a very small negative ; yes fldz ; return zero jmp .return .not_zero: mov eax, [intlength] sub eax, [bnlength] shl eax, 3 ; 256^n=2^8n, now more like movebits add ax, 3FFFh+0 ; bias, no adjustment necessary or ah, 10000000b ; turn on sign flag mov word value(9), ax ; store exponent mov word value(7), 8000h ; store mantissa of 1 in most sig bit ; clear rest of value that is actually used mov dword value(1), 0 mov word value(5), 0 fld tword value(1) jmp .return .sig_bytes_determined: mov edx, ecx ; save in edx for later cmp ecx, 9-1 ; no more than ecx bytes jb .set_movebytes mov ecx, 9-1 ; up to 8 bytes .set_movebytes: ; ecx now holds movebytes ; esi still points to most non-0 sig byte sub esi, ecx ; esi now points to first byte to be moved add ecx, 1 ; can be up to 9 ; clear value mov dword value(0), 0 mov dword value(4), 0 mov word value(8), 0 mov byte value(10), 0 ; copy bytes from n to value ; es:si still holds first move byte of n lea edi, [value9] sub edi, ecx ; ecx holds movebytes ; value[9] is in edi, first move byte of n is now in esi rep movsb ; adjust for negative values xor eax, eax ; use ax as a flag ; get sign flag ; top byte is still in bl and bl, 10000000b ; determine sign jz .not_neg_32 neg dword value(0) ; take the negative of the 9 byte number cmc ; toggle carry flag not dword value(4) adc dword value(4), 0 not byte value(8) ; notice this last one is a byte adc byte value(8), 0 jnc .not_neg_32 ; normal mov byte value(8), 10000000b ;n was FFFF...0000... add eax, 1 ; set eax to 1 to flag this special case .not_neg_32: sub edx, [bnlength] ; adjust exponent add edx, [intlength] ; adjust exponent shl edx, 3 ; 256^n=2^8n add edx, eax ; see special case above ; Shift until most signifcant bit is set. .top_shift_32: test byte value(8), 10000000b ; test msb jnz .bottom sub edx, 1 ; decrement exponent shl dword value(0), 1 ; shift left the 9 byte number rcl dword value(4), 1 rcl byte value(8), 1 ; notice this last one is a byte jmp .top_shift_32 ; don't bother rounding, not really needed while speed is. .bottom: ; adjust exponent add dx, 3FFFh+7-8 ; unbiased -> biased, + adjusted or dh, bl ; set sign bit if set mov word value(9), dx ; unlike float and double, long double is returned on fpu stack fld tword value(1) ; load return value .return: UNFRAME ebx ret ;;bntofloat endp ; ; LDBL floattobn(bf_t n, LDBL f) is in BIGNUM.C ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; These last two functions do not use bignum type numbers, but take ; long doubles as arguments. These routines are called by the C code. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; LDBL extract_256(LDBL f, int *exp_ptr) ; ; extracts the mantissa and exponant of f ; finds m and n such that 1<=|m|<256 and f = m*256^n ; n is stored in *exp_ptr and m is returned, sort of like frexp() ; extract_256 PROC f:real10, exp_ptr: ptr sword ; f & exp_ptr passed ebp + 8 ebp + 20 CGLOBAL extract_256 extract_256: %define f [ebp + PTRSZ + PTRSZ] ; 8 = return address + saved rbp %define exp_ptr [ebp + 20] ; local expon:sword, exf:real10, tmp_word:word ; location = pushed ebx + room for local variables = 36 %define expon [ebp - 36] %define exf [ebp - 36 + 8] %define tmp_word [ebp - 36 + 24] FRAME ebx sub esp, 32 ; room for local variables fld tword f ; f ftst ; test for zero fstsw word tmp_word fwait mov ax, word tmp_word sahf jnz .not_zero ; proceed mov ebx, exp_ptr mov dword [ebx], 0 ; save = in *exp_ptr jmp .bottom ; f, which is zero, is already on stack .not_zero: ; f is already on stack fxtract ; mant exp, where f=mant*2^exp fxch ; exp mant fistp word expon ; mant fwait mov ax, expon mov dx, ax ; make copy for later use cmp ax, 0 jge .pos_exp ; jump if exp >= 0 ; exp is neg, adjust exp add ax, 8 ; exp+8 .pos_exp: ; adjust mantissa and ax, 7 ; ax mod 8 jz .adjust_exponent ; don't bother with zero adjustments mov word expon, ax ; use expon as a temp var fild word expon ; exp mant fxch ; mant exp fscale ; mant*2^exp exp fstp st1 ; mant*2^exp (store in 1 and pop) .adjust_exponent: sar dx, 3 ; exp / 8 mov ebx, exp_ptr mov word [ebx], dx ; save in *exp_ptr fwait .bottom: ; unlike float and double, long double is returned on fpu stack UNFRAME ebx ret ;; extract_256 ENDP ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; LDBL scale_256( LDBL f, int n ); ; calculates and returns the value of f*256^n ; sort of like ldexp() ; ; n must be in the range -2^12 <= n < 2^12 (2^12=4096), ; which should not be a problem ; scale_256 PROC f:real10, n: sword ; f & n passed ebp + 8 ebp + 20 CGLOBAL scale_256 scale_256: %define f [ebp + PTRSZ + PTRSZ] ; 8 = return address + pushed ebp %define n [ebp + 20] cmp dword n, 0 jne .non_zero fld tword f jmp .bottom ; don't bother with scales of zero .non_zero: shl dword n, 3 ; 8n fild dword n ; 8n fld tword f ; f 8n fscale ; new_f=f*2^(8n)=f*256^n 8n fstp st1 ; new_f .bottom: ; unlike float and double, long double is returned on fpu stack ret ;;scale_256 ENDP