mailer | 7 Oct 21:39
Favicon

amiconn: r18736 - trunk/apps/codecs/demac/libdemac

Date: 2008-10-07 21:40:17 +0200 (Tue, 07 Oct 2008)
New Revision: 18736

Log Message:
APE: Further ARMv6 filter optimisations: Save 4 'ror's per round by utilising the shift feature of the
'pack halfword' instructions in the unaligned vector addition/ subtraction, better pipelining in the
aligned scalarproduct(), and a new method to calculate the unaligned scalarproduct().

Modified:
   trunk/apps/codecs/demac/libdemac/vector_math16_armv6.h

Modified: trunk/apps/codecs/demac/libdemac/vector_math16_armv6.h
===================================================================
--- trunk/apps/codecs/demac/libdemac/vector_math16_armv6.h	2008-10-07 19:37:33 UTC (rev 18735)
+++ trunk/apps/codecs/demac/libdemac/vector_math16_armv6.h	2008-10-07 19:40:17 UTC (rev 18736)
@@ -39,36 +39,33 @@

     "10:                             \n"
         "ldrh    r4, [%[v2]], #2     \n"
+        "mov     r4, r4, lsl #16     \n"
     "1:                              \n"
         "ldmia   %[v2]!, {r5-r8}     \n"
         "ldmia   %[v1],  {r0-r3}     \n"
         "mov     r5, r5, ror #16     \n"
-        "pkhbt   r4, r4, r5          \n"
+        "pkhtb   r4, r5, r4, asr #16 \n"
         "sadd16  r0, r0, r4          \n"
-        "mov     r6, r6, ror #16     \n"
-        "pkhbt   r5, r5, r6          \n"
+        "pkhbt   r5, r5, r6, lsl #16 \n"
(Continue reading)


Gmane