This subroutine will calculate a square value of 32-bit value at r5:r4:r3:r2. Result is 16-bit at r17:r16 and will be rounded to nearest integer (0.5 rounds up).
Cycles include call & return: 271 - 316.
R17:R16 = sqrt(R5:R4:R3:R2)
sqrt32:
push R18
push R19
ldi R19,0xc0
clr R18
ldi R17,0x40
sub R16,R16
_sq32_1:
brcs _sq32_2
cp R4,R16
cpc R5,R17
brcs _sq32_3
_sq32_2:
sub R4,R16
sbc R5,R17
or R16,R18
or R17,R19
_sq32_3:
lsr R19
ror R18
eor R17,R19
eor R16,R18
rol R2
rol R3
rol R4
rol R5
sbrs R2,0
rjmp _sq32_1
brcs _sq32_4
cp R16,R4
cpc R17,R5
brcc _sq32_5
_sq32_4:
sbc R3,R19
sbc R4,R16
sbc R5,R17
inc R16
_sq32_5:
lsl R3
rol R4
rol R5
brcs _sq32_6
cp R16,R4
cpc R17,R5
_sq32_6:
adc R16,R19
adc R17,R19
pop R19
pop R18
ret
Cycles include call & return: 271 - 316.
R17:R16 = sqrt(R5:R4:R3:R2)
sqrt32:
push R18
push R19
ldi R19,0xc0
clr R18
ldi R17,0x40
sub R16,R16
_sq32_1:
brcs _sq32_2
cp R4,R16
cpc R5,R17
brcs _sq32_3
_sq32_2:
sub R4,R16
sbc R5,R17
or R16,R18
or R17,R19
_sq32_3:
lsr R19
ror R18
eor R17,R19
eor R16,R18
rol R2
rol R3
rol R4
rol R5
sbrs R2,0
rjmp _sq32_1
brcs _sq32_4
cp R16,R4
cpc R17,R5
brcc _sq32_5
_sq32_4:
sbc R3,R19
sbc R4,R16
sbc R5,R17
inc R16
_sq32_5:
lsl R3
rol R4
rol R5
brcs _sq32_6
cp R16,R4
cpc R17,R5
_sq32_6:
adc R16,R19
adc R17,R19
pop R19
pop R18
ret