;========================================================== ;Useful Routines ;========================================================== ; FloatSqrt_80 ; NewtonIter_sqrt ;========================================================== ;Special case square root ;========================================================== ; sqrt(0) = 0 ; sqrt(NAN) = NAN ; sqrt(inf) = inf ;========================================================== FloatSqrt_80: ; Newton's Iteration for Square Roots call LoadFPOP1 fpOP1_sqrt: ld hl,(fpOP1+12) bit 7,h \ ret nz ld a,h \ or a \ jr nz,$+4 \ or l \ ret z sub 40h \ rra \ rr l \ ld h,a push hl ld hl,4000h rl l ld (fpOP1+12),hl ld de,temp1 call fpOP1_to ;First iteration ;Combining my algorithm's first iteration to get a decent approximation for newton's method. ; if fpOP1<2, use my algo, else rotate in a 1 bit and decrement the exponent ;for most values, it takes off 2 iterations, but for some clusters it only saves 1 Newton iteration ld hl,(fpOP1+12) dec l \ jr nz,newton_sqrt_approx ld (fpOP1+12),hl ld hl,fpOP1+11 sra (hl) \ dec hl rr (hl) \ dec hl rr (hl) \ dec hl rr (hl) \ dec hl rr (hl) \ dec hl rr (hl) \ dec hl rr (hl) \ dec hl rr (hl) \ jp sqrt_iter newton_sqrt_approx: ld hl,fpOP1+11 call clzfpart jr z,endsqrt ;need to perform fpOP1-fpOP1>>(a+1) ;as well as add 1+1/(2^(a+1)-1) which is 1.00...0100...0100... where there are "a" zeroes between each bit, then divide by 2 inc a push af inc a ld hl,fpOP1+11 call rshift ;stored to outp128 ld de,fpOP1+4 ld hl,outp128+8 call sub_64b pop af ld hl,fpOP2+11 call bitunroll_64b ld hl,fpOP2+4 ld de,fpOP1+4 call add_64b ex de,hl rr (hl) \ dec hl rr (hl) \ dec hl rr (hl) \ dec hl rr (hl) \ dec hl rr (hl) \ dec hl rr (hl) \ dec hl rr (hl) \ dec hl rr (hl) ; ld hl,const_1 ; call LoadfpOP2 ; call fpOP1_Add_fpOP2 ; ld hl,(fpOP1+12) ; dec hl ; ld (fpOP1+12),hl ;4 of these iterations gets 56 bits of precision. ;So close, but full precision is necessary. sqrt_iter: call NewtonIter_sqrt call NewtonIter_sqrt call NewtonIter_sqrt call NewtonIter_sqrt ; call NewtonIter_sqrt endsqrt: pop de ld hl,(fpOP1+12) add hl,de ld (fpOp1+12),hl ret NewtonIter_sqrt: ; temp has the value of which to find the square root ; fpOP1 has the last value call fpOP1_to_Ans ld hl,temp1+4 call LoadFPOP2 call fpOP2_div_fpOP1 ld hl,Ans ld de,fpOP3+4 call FloatAdd_80 ld hl,(fpOP1+12) dec hl ld (fpOP1+12),hl ret clzfpart: ;count the number of leading zeroes after the first 1 bit ld e,8 ld a,(hl) \ add a,a \ jr nz,clzbyte dec e \ ret z \ dec hl \ or (hl) \ jr z,$-4 clzbyte: sla e \ sla e \ sla e dec e add a,a jr nc,$-2 ld a,63 \ sub e inc e ; in order to ensure nz ret rshift: ;a is the number of bits to right shift ;hl points to the data to shift ;shifted result in outp128 ld c,a ld de,outp128+15 and $F8 jr z,shiftbits rra \ rra \ rra ld b,a xor a ld (de),a dec de djnz $-2 shiftbits: ld a,64 sub c \ and $F8 \ rra \ rra \ rra \ inc a ld b,c ld c,a ld a,b ld b,0 push bc push de lddr pop hl pop bc and 7 ret z push hl \ or a \ ld b,c ;number of bytes to shift rr (hl) \ dec hl \ djnz $-3 pop hl \ dec a \ jr nz,$-10 ret bitunroll_64b: ;A is the number of 0 bits between 1s ld c,a cp 8 \ jp nc,arbu dec a \ jp m,bu_0 jr z,bu_1 dec a \ jr z,bu_2 dec a \ jr z,bu_3 dec a \ jr z,bu_4 dec a \ jr z,bu_5 dec a \ jr z,bu_6 dec a \ jr z,bu_7 bu_7: ld a,80h jp bu_0+1 bu_1: ld a,$AB bu_0: dec a ld (hl),a \ dec hl ld (hl),a \ dec hl ld (hl),a \ dec hl ld (hl),a \ dec hl ld (hl),a \ dec hl ld (hl),a \ dec hl ld (hl),a \ dec hl ld (hl),a \ ret bu_6: ld (hl),%10000001 \ dec hl ld (hl),%00000010 \ dec hl ld (hl),%00000100 \ dec hl ld (hl),%00001000 \ dec hl ld (hl),%00010000 \ dec hl ld (hl),%00100000 \ dec hl ld (hl),%01000000 \ dec hl ld (hl),%10000001 \ dec hl bu_5: ld (hl),%10000010 \ dec hl ld (hl),%00001000 \ dec hl ld (hl),%00100000 \ dec hl ld (hl),%10000010 \ dec hl ld (hl),%00001000 \ dec hl ld (hl),%00100000 \ dec hl ld (hl),%10000010 \ dec hl ld (hl),%00001000 \ ret bu_4: ld (hl),%10000100 \ dec hl ld (hl),%00100001 \ dec hl ld (hl),%00001000 \ dec hl ld (hl),%01000010 \ dec hl ld (hl),%00010000 \ dec hl ld (hl),%10000100 \ dec hl ld (hl),%00100001 \ dec hl ld (hl),%00001000 \ ret bu_3: ld a,$88 jp bu_0+1 bu_2: ld (hl),%10010010 \ dec hl ld (hl),%01001001 \ dec hl ld (hl),%00100100 \ dec hl ld (hl),%10010010 \ dec hl ld (hl),%01001001 \ dec hl ld (hl),%00100100 \ dec hl ld (hl),%10010010 \ dec hl ld (hl),%01001001 \ ret arbu: ;okay, enough of the custom ones ld (hl),80h \ dec hl dec hl \ ld b,a \ ld (hl),0 \ sub 8 \ jr nc,$+10 \ add a,c \ ld d,a \ ld a,1 \ inc b \ rrca \ djnz $-1 \ ld (hl),a dec hl \ ld b,a \ ld (hl),0 \ sub 8 \ jr nc,$+10 \ add a,c \ ld d,a \ ld a,1 \ inc b \ rrca \ djnz $-1 \ ld (hl),a dec hl \ ld b,a \ ld (hl),0 \ sub 8 \ jr nc,$+10 \ add a,c \ ld d,a \ ld a,1 \ inc b \ rrca \ djnz $-1 \ ld (hl),a dec hl \ ld b,a \ ld (hl),0 \ sub 8 \ jr nc,$+10 \ add a,c \ ld d,a \ ld a,1 \ inc b \ rrca \ djnz $-1 \ ld (hl),a dec hl \ ld b,a \ ld (hl),0 \ sub 8 \ jr nc,$+10 \ add a,c \ ld d,a \ ld a,1 \ inc b \ rrca \ djnz $-1 \ ld (hl),a dec hl \ ld b,a \ ld (hl),0 \ sub 8 \ jr nc,$+10 \ add a,c \ ld d,a \ ld a,1 \ inc b \ rrca \ djnz $-1 \ ld (hl),a dec hl \ ld b,a \ ld (hl),0 \ sub 8 \ ret nc \ add a,c \ ld d,a \ ld a,1 \ inc b \ rrca \ djnz $-1 \ ld (hl),a \ ret .echo "FloatSqrt :",$-FloatSqrt_80