1 2%include "Tools.inc" 3 4segment_code 5 6; 7; void Adapt ( short* pM, const short* pAdapt, int nDirection, int nOrder ) 8; 9; [esp+16] nOrder 10; [esp+12] nDirection 11; [esp+ 8] pAdapt 12; [esp+ 4] pM 13; [esp+ 0] Return Address 14 15 align 16 16 nop 17 nop 18 nop 19 nop 20 nop 21 nop 22 nop 23 nop 24 nop 25 nop 26proc Adapt 27 28 mov eax, [esp + 4] ; pM 29 mov ecx, [esp + 8] ; pAdapt 30 mov edx, [esp + 16] ; nOrder 31 shr edx, 4 32 33 cmp dword [esp + 12], byte 0 ; nDirection 34 jle short AdaptSub 35 36AdaptAddLoop: 37 movq mm0, [eax] 38 paddw mm0, [ecx] 39 movq [eax], mm0 40 movq mm1, [eax + 8] 41 paddw mm1, [ecx + 8] 42 movq [eax + 8], mm1 43 movq mm2, [eax + 16] 44 paddw mm2, [ecx + 16] 45 movq [eax + 16], mm2 46 movq mm3, [eax + 24] 47 paddw mm3, [ecx + 24] 48 movq [eax + 24], mm3 49 add eax, byte 32 50 add ecx, byte 32 51 dec edx 52 jnz AdaptAddLoop 53 54 emms 55 ret 56 57 align 16 58 nop 59 nop 60 nop 61 nop 62 nop 63 nop 64 nop 65 nop 66 nop 67 nop 68 nop 69 nop 70 nop 71 nop 72 73AdaptSub: je short AdaptDone 74 75AdaptSubLoop: 76 movq mm0, [eax] 77 psubw mm0, [ecx] 78 movq [eax], mm0 79 movq mm1, [eax + 8] 80 psubw mm1, [ecx + 8] 81 movq [eax + 8], mm1 82 movq mm2, [eax + 16] 83 psubw mm2, [ecx + 16] 84 movq [eax + 16], mm2 85 movq mm3, [eax + 24] 86 psubw mm3, [ecx + 24] 87 movq [eax + 24], mm3 88 add eax, byte 32 89 add ecx, byte 32 90 dec edx 91 jnz AdaptSubLoop 92 93 emms 94AdaptDone: 95 96endproc 97 98; 99; int CalculateDotProduct ( const short* pA, const short* pB, int nOrder ) 100; 101; [esp+12] nOrder 102; [esp+ 8] pB 103; [esp+ 4] pA 104; [esp+ 0] Return Address 105 106 align 16 107 nop 108 nop 109 nop 110 nop 111 nop 112 nop 113 nop 114 nop 115 nop 116 nop 117 nop 118 nop 119 nop 120 nop 121 122proc CalculateDotProduct 123 124 mov eax, [esp + 4] ; pA 125 mov ecx, [esp + 8] ; pB 126 mov edx, [esp + 12] ; nOrder 127 shr edx, 4 128 pxor mm7, mm7 129 130loopDot: movq mm0, [eax] 131 pmaddwd mm0, [ecx] 132 paddd mm7, mm0 133 movq mm1, [eax + 8] 134 pmaddwd mm1, [ecx + 8] 135 paddd mm7, mm1 136 movq mm2, [eax + 16] 137 pmaddwd mm2, [ecx + 16] 138 paddd mm7, mm2 139 movq mm3, [eax + 24] 140 pmaddwd mm3, [ecx + 24] 141 add eax, byte 32 142 add ecx, byte 32 143 paddd mm7, mm3 144 dec edx 145 jnz loopDot 146 147 movq mm6, mm7 148 psrlq mm7, 32 149 paddd mm6, mm7 150 movd [esp + 4], mm6 151 emms 152 mov eax, [esp + 4] 153endproc 154 155 156; 157; BOOL GetMMXAvailable ( void ); 158; 159 160proc GetMMXAvailable 161 pushad 162 pushfd 163 pop eax 164 mov ecx, eax 165 xor eax, 0x200000 166 push eax 167 popfd 168 pushfd 169 pop eax 170 cmp eax, ecx 171 jz short return ; no CPUID command, so no MMX 172 173 mov eax,1 174 CPUID 175 test edx,0x800000 176return: popad 177 setnz al 178 and eax, byte 1 179endproc 180 181 end 182