/* * muls16x16_32 (Ver. 1.0) * Author: Nicholas Lombardo * Date: December 19, 2014 * Test the subroutine muls16x16_32 which will multiply two 16-bit signed numbers */ .INCLUDE .DSEG A: .BYTE 2 B: .BYTE 2 C: .BYTE 4 .CSEG // A and B values saved in FPM in a table ldi r16,4 mov r2,r16 // r2 will count iterations (4 numbers to check) ldi ZH,high(table<<1) ldi ZL,low(table<<1) rjmp loading // jump over table // Variable Loading: indirect transfer from table to A/B loading: lpm r22,Z+ lpm r23,Z+ sts A+1,r23 sts A,r22 lpm r20,Z+ lpm r21,Z+ sts B+1,r21 sts B,r20 clr r31 // r31 used for spacing // Calculate the product of A*B and push to stack loop: rcall muls16x16_32 // A*B push r19 // save result on stack push r18 push r17 push r16 push r31 // add space for next section dec r2 // dec counter, load next pair brne loading // After 4 pairs tested, reload to GPR to review results. // Rather than popping registers from the stack, Y pointer is used // to indirectly load from the top of SRAM to r20 (from 0x08FF to Stack Pointer) // X pointer is then used to store r20 to to r0:r18. Uses FIFO ordering. ldi YH,high(ramend) // Y pointer will ld from top of SRAM (stack) ldi YL,low(ramend) clr XH // X pointer will st to GPRs (0x0001 = r1) ldi XL,0x01 ld r0,Y reload: ld r21,-Y // indirect load from SRAM stack (top --> bot) st X+,r21 // indirect store to GPRs in r20,SPL // stop when Y = stack pointer cp YL,r20 brge reload // clear other registers for legibility ldi XL,19 // start at r19 clear: st X+,r31 // r31 = 0x00 cpi XL,0x1A // stop clearing at XL (r26) brne clear clr r26 // program done end: rjmp end ;----------------------------------------------- // (MSByte) (LSByte) // 1. r0: r1: r2: r3 // 2. r5: r6: r7: r8 // 3. r10: r11: r12: r13 // 4. r15: r16: r17: r18 // Table of values for A and B // A B table: .DW 8592, -12643 // 1. A * B = -108628656 (F9 86 75 50) .DW -1333, 1493 // 2. -1990169 (FF E1 A1 E7) .DW 759, 15 // 3. 11385 (00 00 2C 79) .DW 0x1074, 0x87B2 // 4. 4212 * -30798 = -129721176 (F8 44 9C A8) /* Signed multiply of two 16-bit numbers with 32-bit result. * Usage * Inputs: r23:r22 * r21:r20 * Outputs: r19:r18:r17:r16 */ muls16x16_32: push r2 clr r2 muls r23, r21 // (signed)ah * (signed)bh movw r19:r18, r1:r0 mul r22, r20 // al * bl movw r17:r16, r1:r0 mulsu r23, r20 // (signed)ah * bl sbc r19, r2 add r17, r0 adc r18, r1 adc r19, r2 mulsu r21, r22 // (signed)bh * al sbc r19, r2 add r17, r0 adc r18, r1 adc r19, r2 pop r2 ret