Listing 2: Fast MMX register copy
_asm
{
mov esi, pData
mov ecx, dwByteCount
mov edi, pDest
sub ecx, 256
jl DonePreWarm
ALIGN 16
PreWarm:
// Pre-warm the read buffer
;clocks
mov al, [esi] ;1
mov bl, [esi+32] ;0
mov al, [esi+64] ;1
mov bl, [esi+96] ;0
mov al, [esi+128] ;1
mov bl, [esi+160] ;0
mov al, [esi+192] ;1
mov bl, [esi+224] ;0
// The nop will force the code
// to pair better.
add esi, 256 ;1
nop ;0
sub ecx, 256 ;1
jg PreWarm ;0
DonePreWarm:
mov ecx, dwByteCount;
mov esi, pData;
sub ecx, 8
jl DoneCopy
ALIGN 16
LoopCopy:
mov mm0, [esi+ecx] ;1
mov [edi+ecx], mm0 ;1
sub ecx, 8 ;1
jg LoopCopy ;0
DoneCopy:
emms
}
: End of File