|
Везде оптимизация на скорость.
---------- TMS320C62xx / 67xx (int 32 bit, long 40 bit)
---------- CCS v2.2
1 такт на 1-ну итерацию суммы.
;******************************************************************************
;* FUNCTION NAME: _mean *
;* *
;* Regs Modified : A0,A1,A2,A3,A4,A5,A6,A7,A8,A9,B0,B1,B2,B3,B4,B5,B6, *
;* B7,B8,B9,SP *
;* Regs Used : A0,A1,A2,A3,A4,A5,A6,A7,A8,A9,B0,B1,B2,B3,B4,B5,B6, *
;* B7,B8,B9,SP *
;* Local Frame Size : 0 Args + 0 Auto + 4 Save = 4 byte *
;******************************************************************************
_mean:
STW .D2T2 B3,*SP--(8) ; |105|MV .D1 A4,A3
|| MV .D2 B4,B5MVK .S2 0x4,B1 ; init prolog collapse predicate
L17: ; PIPED LOOP PROLOG
B .S2 L18 ; (P) |110|
B .S2 L18 ; (P) @|110|
B .S2 L18 ; (P) @@|110|SUB .D2 B5,1,B0
|| B .S2 L18 ; (P) @@@|110|SUB .S1X B5,1,A2
|| ZERO .L1 A1:A0 ; |107|
|| LDW .D1T1 *A3++,A4 ; (P) |109|
|| [ B0] SUB .D2 B0,1,B0 ; (P) @@@@@|110|
|| [ B0] B .S2 L18 ; (P) @@@@|110|;** --------------------------------------------------------------------------*
L18: ; PIPED LOOP KERNEL[ B1] SUB .D2 B1,1,B1 ;
|| [ A2] SUB .S1 A2,1,A2 ;
|| [!B1] ADD .L1 A4,A1:A0,A1:A0 ; |109|
|| [ A2] LDW .D1T1 *A3++,A4 ; @@@@@|109|
|| [ B0] B .S2 L18 ; @@@@@|110|
|| [ B0] SUB .L2 B0,1,B0 ; @@@@@@|110|;** --------------------------------------------------------------------------*
L19: ; PIPED LOOP EPILOG
;** 111 ----------------------- return (int)(sum/(long)count);
.line 8
NOP 1MV .D1 A1,A5 ; |111|
|| ZERO .D2 B5
|| B .S2 __divli ; |111|MVKL .S2 RL2,B3 ; |111|
MVKH .S2 RL2,B3 ; |111|
MV .D1 A0,A4 ; |111|
NOP 2
RL2: ; CALL OCCURS ; |111|
.line 9
NOP 1
LDW .D2T2 *++SP(8),B3 ; |112|
NOP 4
B .S2 B3 ; |112|
NOP 5
; BRANCH OCCURS ; |112|
.endfunc 112,000080000h,8
---------- TMS320VC55xx
---------- CCS v2.2
1 такт на 1 итерацию
;*******************************************************************************
;* FUNCTION NAME: _mean *
;*******************************************************************************
_mean:
AADD #-1, SP
SUB #1, T0, AR1
MOV AR1, CSRRPT CSR
|| MOV #0, AC0 ; |5|ADD *AR0+, AC0, AC0 ; |7|
AND #0xffff, T0, AC1 ; |9|
CALL #__divli ; |9|
; call occurs [#__divli] ; |9|
MOV AC0, T0 ; |9|
AADD #1, SP ; |9|
RET ; |9|
----------- MSP430
----------- MSP FET 3.04
что-то дохрена тактов на итерацию.
mean:
PUSH R10
PUSH R11
PUSH R8
PUSH R9
MOV R14,R15
MOV #0,R10
MOV #0,R11
?0002:
MOV @R12+,R8
MOV R8,R9
XOR #-1,R9
ADD R9,R9
SUBC R9,R9
ADD R8,R10
ADDC R9,R11
ADD #-1,R14
JNE (?0002)
MOV R10,R12
MOV R11,R13
MOV R15,R14
MOV #0,R15
CALL #?SL_DIVMOD_L03
POP R9
POP R8
POP R11
POP R10
RET
E-mail: info@telesys.ru