/////////////////////////////////////////////////////////////////////////////// // // ## ###### // ###### ### // ## ############### Shark 3D Engine (www.shark3d.com) // ########## # # # // ######## Copyright (c) 1996-2006 Spinor GmbH. // ######### # # # All rights reserved. // ## ########## // ## // /////////////////////////////////////////////////////////////////////////////// //@cpp #ifndef S3D_DRV_D3D9_MMX_H #define S3D_DRV_D3D9_MMX_H #include "general/general_env.h" #include "sys/core/sys_types.h" #include "sys/core/sys_assert.h" #include "sys/core/sys_lang.h" #include #include /////////////////////////////////////////////////////////////////////////////// #ifdef S3D_DRV_D3D9_MMX_USEPROLOGUE #define S3D_DRV_D3D9_MMX_PROLOGUE \ push esi \ push edi \ push ecx #define S3D_DRV_D3D9_MMX_EPILOGUE \ pop ecx \ pop edi \ pop esi #else #define S3D_DRV_D3D9_MMX_PROLOGUE #define S3D_DRV_D3D9_MMX_EPILOGUE #endif //S3D_DRV_D3D9_MMX_USEPROLOGUE /////////////////////////////////////////////////////////////////////////////// //@ void s3d_DrvD3d9MemcpyMmx8(void *Dest, const void *Src, s3d_CSysIntps Len); //@ void s3d_DrvD3d9MemcpyMmx16(void *Dest, const void *Src, s3d_CSysIntps Len); //@ void s3d_DrvD3d9MemcpyMmx32(void *Dest, const void *Src, s3d_CSysIntps Len); //@ void s3d_DrvD3d9MemcpyMmx64(void *Dest, const void *Src, s3d_CSysIntps Len); //@ void s3d_DrvD3d9MemfillMmx64(void *Dest, int Val, s3d_CSysIntps Len); /////////////////////////////////////////////////////////////////////////////// S3D_SYS_INLINE D3DX_ALIGN16 void s3d_DrvD3d9MemcpyMmx8(void *Dest, const void *Src, s3d_CSysIntps Len) { S3D_SYS_ASSERT((Len % 8) == 0); __asm { S3D_DRV_D3D9_MMX_PROLOGUE mov esi, Src mov edi, Dest mov ecx, Len shr ecx, 3 // 8 bytes per iteration loop1: //prefetchnta 8[ESI] // Prefetch next loop, non-temporal movq mm1, 0[ESI] // Read in source data movntq 0[EDI], mm1 // Non-temporal stores add esi, 8 add edi, 8 dec ecx jnz loop1 emms S3D_DRV_D3D9_MMX_EPILOGUE } } S3D_SYS_INLINE D3DX_ALIGN16 void s3d_DrvD3d9MemcpyMmx16(void *Dest, const void *Src, s3d_CSysIntps Len) { S3D_SYS_ASSERT((Len % 16) == 0); __asm { S3D_DRV_D3D9_MMX_PROLOGUE mov esi, Src mov edi, Dest mov ecx, Len shr ecx, 4 // 16 bytes per iteration loop1: //prefetchnta 16[ESI] // Prefetch next loop, non-temporal movq mm1, 0[ESI] // Read in source data movq mm2, 8[ESI] movntq 0[EDI], mm1 // Non-temporal stores movntq 8[EDI], mm2 add esi, 16 add edi, 16 dec ecx jnz loop1 emms S3D_DRV_D3D9_MMX_EPILOGUE } } S3D_SYS_INLINE D3DX_ALIGN16 void s3d_DrvD3d9MemcpyMmx32(void *Dest, const void *Src, s3d_CSysIntps Len) { S3D_SYS_ASSERT((Len % 32) == 0); __asm { S3D_DRV_D3D9_MMX_PROLOGUE mov esi, Src mov edi, Dest mov ecx, Len shr ecx, 5 // 32 bytes per iteration loop1: //prefetchnta 32[ESI] // Prefetch next loop, non-temporal movq mm1, 0[ESI] // Read in source data movq mm2, 8[ESI] movq mm3, 16[ESI] movq mm4, 24[ESI] movntq 0[EDI], mm1 // Non-temporal stores movntq 8[EDI], mm2 movntq 16[EDI], mm3 movntq 24[EDI], mm4 add esi, 32 add edi, 32 dec ecx jnz loop1 emms S3D_DRV_D3D9_MMX_EPILOGUE } } S3D_SYS_INLINE D3DX_ALIGN16 void s3d_DrvD3d9MemcpyMmx64(void *Dest, const void *Src, s3d_CSysIntps Len) { S3D_SYS_ASSERT((Len % 64) == 0); __asm { S3D_DRV_D3D9_MMX_PROLOGUE mov esi, Src mov edi, Dest mov ecx, Len shr ecx, 6 // 64 bytes per iteration loop1: //prefetchnta 64[ESI] // Prefetch next loop, non-temporal movq mm1, 0[ESI] // Read in source data movq mm2, 8[ESI] movq mm3, 16[ESI] movq mm4, 24[ESI] movq mm5, 32[ESI] movq mm6, 40[ESI] movq mm7, 48[ESI] movq mm0, 56[ESI] movntq 0[EDI], mm1 // Non-temporal stores movntq 8[EDI], mm2 movntq 16[EDI], mm3 movntq 24[EDI], mm4 movntq 32[EDI], mm5 movntq 40[EDI], mm6 movntq 48[EDI], mm7 movntq 56[EDI], mm0 add esi, 64 add edi, 64 dec ecx jnz loop1 emms S3D_DRV_D3D9_MMX_EPILOGUE } } /////////////////////////////////////////////////////////////////////////////// S3D_SYS_INLINE D3DX_ALIGN16 void s3d_DrvD3d9MemfillMmx64(void *Dest, int Val, s3d_CSysIntps Len) { S3D_SYS_ASSERT(((UINT_PTR)Dest & 0xF) == 0); S3D_SYS_ASSERT((Len % 64) == 0); __m64 Val64; Val64.m64_i32[0] = Val; Val64.m64_i32[1] = Val; __asm { S3D_DRV_D3D9_MMX_PROLOGUE mov edi, Dest mov ecx, Len shr ecx, 6 // 64 bytes per iteration movq mm1, Val64 // Read in source data loop1: //prefetchnta 64[ESI] // Prefetch next loop, non-temporal movntq 0[EDI], mm1 // Non-temporal stores movntq 8[EDI], mm1 movntq 16[EDI], mm1 movntq 24[EDI], mm1 movntq 32[EDI], mm1 movntq 40[EDI], mm1 movntq 48[EDI], mm1 movntq 56[EDI], mm1 add edi, 64 dec ecx jnz loop1 emms S3D_DRV_D3D9_MMX_EPILOGUE } } /////////////////////////////////////////////////////////////////////////////// #endif