Listing 2: Fast MMX register copy


_asm
{
mov esi, pData
mov ecx, dwByteCount
 mov edi, pDest
 sub ecx, 256
 jl DonePreWarm

ALIGN 16
PreWarm:
 // Pre-warm the read buffer
    ;clocks
 mov al, [esi]  ;1
 mov bl, [esi+32] ;0
 mov al, [esi+64] ;1
 mov bl, [esi+96] ;0
 mov al, [esi+128] ;1
 mov bl, [esi+160] ;0
 mov al, [esi+192] ;1
 mov bl, [esi+224] ;0
 
 // The nop will force the code
 // to pair better.
 add esi, 256  ;1 
  nop   ;0 
 
 sub ecx, 256  ;1
  jg PreWarm  ;0

DonePreWarm:
 mov ecx, dwByteCount;
 mov esi, pData;
 sub ecx, 8
 jl DoneCopy
ALIGN 16
LoopCopy:
 mov mm0, [esi+ecx] ;1
 mov [edi+ecx], mm0 ;1
 sub ecx, 8  ;1
  jg LoopCopy  ;0
DoneCopy:
 emms
}
: End of File