00001
00007 #include "memory.h"
00008 #include "malloc.h"
00009
00010 DKC_INLINE size_t dkcstd_size_align(size_t size,size_t align_offset){
00011 size_t t = size;
00012 while((t % align_offset )!= 0){
00013 t--;
00014 if(0==t){
00015 break;
00016 }
00017 }
00018 return t;
00019 }
00020
00021
00022 void dkcstd_get_memory_align(DKCSTD_ALIGNED_MEM *r,const void *src,size_t srcsize,size_t align_offset)
00023 {
00024 size_t o = dkcstd_offset_align(src,align_offset);
00025 BYTE *t = (BYTE *)src;
00026 size_t m = srcsize - o;
00027 r->access_permit_from_mem = dkcstd_size_align(m,align_offset);
00028
00029 r->mem = t + o;
00030 r->offset_from_original = o;
00031 }
00032
00033
00034
00035 DKC_INLINE void *dkcstd_pointer_align( const void *src,size_t align_offset){
00036 return ((uint8 *)src) + dkcstd_offset_align(src,align_offset);
00037 }
00038
00039 DKC_INLINE size_t dkcstd_offset_align(const void *src,size_t align_offset){
00040 size_t force = (size_t)src;
00041 size_t save = force;
00042 while((force % align_offset )!= 0){
00043 force++;
00044 #ifdef WIN32
00045
00046
00047 if(0x80000000 <= force){
00048 break;
00049 }
00050 #else
00051 if(0==force){
00052 break;
00053 }
00054 #endif
00055 }
00056 return force - save;
00057 }
00058 BOOL dkcstd_IsAligned(const void *src,size_t bytes)
00059 {
00060 size_t force = (size_t)src;
00061 return (force % bytes == 0);
00062 }
00063
00064 void *dkcstd_memcpy8( void *dest, const void *src, size_t count ){
00065 size_t i;
00066 uint8 *d;
00067 const uint8 *s;
00068
00069 d = dest;
00070 s = src;
00071
00072 for(i=0;i<count;i++){
00073 d[i] = s[i];
00074 }
00075 return dest;
00076 }
00077
00078
00079
00080 void *dkcstd_memset8( void *dest, int c, size_t count ){
00081 uint8 cc = (uint8)c;
00082 size_t i;
00083 for(i=0;i<count;i++){
00084 ((uint8 *)dest)[i] = cc;
00085 }
00086 return dest;
00087 }
00088
00089 int dkcstd_memcmp8(const void *buf1, const void *buf2, size_t count ){
00090 const uint8 *a = buf1,*b = buf2;
00091 size_t i;
00092 for(i=0;i<count;i++)
00093 {
00094 if(a[i] != b[i])
00095 {
00096
00097
00098
00099
00100
00101 const int *aa,*bb;
00102 aa = (const int *)&a[i];
00103 bb = (const int *)&b[i];
00104 return *aa - *bb;
00105
00106 }
00107 }
00108 return 0;
00109 }
00110
00111
00112 void *dkcstd_memchr8( const void *buf, int c, size_t count )
00113 {
00114 size_t i;
00115 const uint8 *t = buf,cc = (const uint8)c;
00116 for(i=0;i<count;i++){
00117 if(t[i] == cc){
00118 return (void *)&(t[i]);
00119 }
00120 }
00121 return NULL;
00122 }
00123
00124 #if 1==DKUTIL_CSTD_MSVC_IASM
00125 #pragma warning( disable : 4035 ) // 警告:返値がないよん
00126
00127
00129 #define DKCSTD_MEMCPY_FILTER(limit) \
00130 if(count <= DKUTIL_BIT_TO_BYTE(limit) ) return dkcstd_memcpy(dest,src,count);
00131
00133 #define DKCSTD_MEMCPY_ALIGN_FILTER(limit) \
00134 if(\
00135 !(dkcstd_IsAligned(src,limit) && dkcstd_IsAligned(dest,limit))\
00136 || !(count % limit == 0) \
00137 )\
00138 return dkcstd_memcpy(dest,src,count);
00139
00140 void *dkcstd_memcpy_rep_movsd( void *dest, const void *src, size_t count ){
00141 _asm {
00142 mov edi,dest ;edi = dest
00143 mov esi,src ;esi = src
00144 mov ecx,count ;ecx = count
00145 ;mov eax,03h ; %4に使うbitmask
00146 mov edx,ecx ;edx = ecx
00147 shr ecx,2 ;ecx / 4
00148 rep movsd ;ecxが無くなるまでmemcpy見たいナもの
00149 ;MiniCopy:
00150 and edx, 03h ;edx & eax ( count % 4 )
00151 mov ecx, edx ;
00152 rep movsb ;N BYTE MOVE
00153 cld
00154 mov eax,[dest] ;V return dst
00155 }
00156
00157 }
00158
00159
00160 void *dkcstd_memcpy_FPU64( void *dest, const void *src, size_t count )
00161 {
00162
00163 DKCSTD_MEMCPY_FILTER(64);
00164 DKCSTD_MEMCPY_ALIGN_FILTER(64);
00165 optimized_memcpy_fpu_8bytes(dest,src,count);
00166 return dest;
00167
00168
00169
00170
00171
00172
00173
00174
00175
00176
00177
00178
00179
00180
00181
00182
00183
00184
00185
00186
00187
00188
00189
00190
00191
00192
00193
00194
00195
00196 }
00197
00198 void *dkcstd_memcpy_asm64( void *dest, const void *src, size_t count ){
00199
00200 DKCSTD_MEMCPY_FILTER(64);
00201
00202 _asm {
00203 mov edi,dest;
00204 mov esi,src;
00205 mov ecx,count;
00206 mov edx,ecx;
00207 shr ecx,3;
00208 lx:
00209 mov eax,[esi];
00210 mov ebx,[esi+4];
00211 add esi,8;
00212 mov [edi],eax;
00213 mov [edi+4],ebx;
00214 add edi,8;
00215 dec ecx;
00216 jnz lx;
00217 ;MiniCopy:
00218 mov ecx,edx ;
00219 and ecx, 7 ;edx & 7 ( count % 8 )
00220 rep movsb ;N BYTE MOVE
00221 cld
00222
00223 }
00224 return dest;
00225
00226 }
00227
00228
00229 void *dkcstd_memcpy_asm32( void *dest, const void *src, size_t count ){
00230
00231
00232
00233 DKCSTD_MEMCPY_FILTER(32);
00234 _asm {
00235 mov edi,dest;
00236 mov esi,src;
00237 mov ecx,count;
00238 mov ebx,ecx;
00239 shr ecx,2;
00240 lx:
00241 mov eax,[esi];
00242 add esi,4;
00243 mov [edi],eax;
00244 add edi,4;
00245 dec ecx;
00246 jnz lx;
00247 ;MiniCopy:
00248 mov ecx,ebx ;
00249 and ecx, 3 ; ( count % 4 )
00250 rep movsb ;N BYTE MOVE
00251 cld
00252 mov eax,[dest] ;V return dest
00253 }
00254
00255
00256 }
00257
00258
00259 DKC_EXTERN void *dkcstd_memcpy_movntq_mmx_prefetch128( void *dest, const void *src, size_t count )
00260 {
00261
00262 DKCSTD_MEMCPY_FILTER(128);
00263 _asm {
00264 mov edi,dest;
00265 mov esi,src;
00266 mov ecx,count;
00267 mov eax,ecx;
00268 shr ecx,4;
00269 lx:
00270 movq mm0,[esi];
00271 movq mm1,[esi+8];
00272 lea esi,[esi+16];
00273 movntq [edi],mm0;
00274 prefetcht0 [esi+768];
00275 movntq [edi+8],mm1;
00276 lea edi,[edi+16];
00277 dec ecx;
00278 jnz lx;
00279 ;MiniCopy:
00280 mov ecx,eax ;
00281 and ecx, 15 ;edx & 15 ( count % 16 )
00282 rep movsb ;N BYTE MOVE
00283 cld
00284 mov eax,[dest] ;V return dst
00285 }
00286
00287
00288 }
00289 void *dkcstd_memcpy_movntq_mmx128( void *dest, const void *src, size_t count ){
00290 DKCSTD_MEMCPY_FILTER(128);
00291
00292 _asm {
00293 mov edi,dest;
00294 mov esi,src;
00295 mov ecx,count;
00296 mov eax,ecx
00297 shr ecx,4;
00298 lx:
00299 movq mm0,[esi];
00300 movq mm1,[esi+8];
00301 lea esi,[esi+16];
00302 movntq [edi],mm0;
00303 movntq [edi+8],mm1;
00304 lea edi,[edi+16];
00305 dec ecx;
00306 jnz lx;
00307 ;MiniCopy:
00308 mov ecx,eax ;
00309 and ecx, 15 ;edx & 15 ( count % 16 )
00310 rep movsb ;N BYTE MOVE
00311 cld
00312 mov eax,[dest] ;V return dst
00313 }
00314
00315 }
00316
00317 void *dkcstd_memcpy_mmx128( void *dest, const void *src, size_t count ){
00318 DKCSTD_MEMCPY_FILTER(128);
00319 _asm {
00320 mov edi,dest;
00321 mov esi,src;
00322 mov ecx,count;
00323 mov eax,ecx;
00324
00325
00326
00327 shr ecx,4;
00328 lx:
00329 movq mm0,[esi];
00330 movq mm1,[esi+8];
00331 lea esi,[esi+16];
00332 movq [edi],mm0;
00333 movq [edi+8],mm1;
00334 lea edi,[edi+16];
00335 dec ecx;
00336 jnz lx;
00337 MiniCopy:
00338 sub eax,10h
00339 mov ecx,eax ;
00340 and ecx, 15 ;edx & 15 ( count % 16 )
00341 rep movsb ;N BYTE MOVE
00342 cld
00343 mov eax,[dest] ;V return dest
00344 }
00345
00346
00347 }
00348
00349 void *dkcstd_memcpy_movntps_sse128( void *dest, const void *src, size_t count ){
00350 DKCSTD_MEMCPY_FILTER(128);
00351 _asm {
00352 mov edi,[dest];
00353 mov esi,[src];
00354 mov ecx,count;
00355 mov eax,ecx;
00356 shr ecx,4;
00357 lx:
00358 movaps xmm0,[esi];
00359 lea esi,[esi+16];
00360 movntps [edi],xmm0;
00361 lea edi,[edi+16];
00362 dec ecx;
00363 jnz lx;
00364 ;MiniCopy:
00365 mov ecx,eax ;
00366 and ecx, 15 ;edx & 15 ( count % 16 )
00367 rep movsb ;N BYTE MOVE
00368 cld
00369 mov eax,[dest] ;V return dest
00370 }
00371
00372 }
00373
00374 void *dkcstd_memcpy_sse128( void *dest, const void *src, size_t count ){
00375 DKCSTD_MEMCPY_FILTER(128);
00376 _asm {
00377 mov edi,dest;
00378 mov esi,src;
00379 mov ecx,count;
00380 mov eax,ecx;
00381 shr ecx,4;
00382 lx:
00383 movaps xmm0,[esi];
00384 lea esi,[esi+16];
00385 movaps [edi],xmm0;
00386 lea edi,[edi+16];
00387 dec ecx;
00388 jnz lx;
00389 ;MiniCopy:
00390 mov ecx,eax ;
00391 and ecx, 15 ;edx & 15 ( count % 16 )
00392 rep movsb ;N BYTE MOVE
00393 cld
00394 mov eax,[dest] ;V return dest
00395 }
00396 }
00397 void *dkcstd_memcpy_movntq_mmx64( void *dest, const void *src, size_t count ){
00398 DKCSTD_MEMCPY_FILTER(64);
00399 _asm {
00400 mov edi,dest;
00401 mov esi,src;
00402 mov ecx,count;
00403 mov eax,ecx;
00404 shr ecx,3;
00405 lx:
00406 movq mm0,[esi];
00407 lea esi,[esi+8];
00408 movntq [edi],mm0;
00409 lea edi,[edi+8];
00410 dec ecx;
00411 jnz lx;
00412 ;MiniCopy:
00413 mov ecx,eax ;
00414 and ecx, 7 ;( count % 16 )
00415 rep movsb ;N BYTE MOVE
00416 cld
00417 mov eax,[dest] ;V return dst
00418 }
00419
00420 }
00421
00422
00423 void *dkcstd_memcpy_mmx64( void *dest, const void *src, size_t count )
00424 {
00425 DKCSTD_MEMCPY_FILTER(64);
00426 _asm {
00427 mov edi,dest;
00428 mov esi,src;
00429 mov ecx,count;
00430 mov edx,ecx;
00431
00432
00433
00434 shr ecx,3;
00435 lx:
00436 movq mm0,[esi];
00437 add esi,8;
00438 movq [edi],mm0;
00439 add edi,8;
00440 dec ecx;
00441 jnz lx;
00442 MiniCopy:
00443 mov ecx,edx ;
00444 and ecx, 7 ;( count % 8 )
00445 rep movsb ;N BYTE MOVE
00446 cld
00447 mov eax,[dest] ;V return dest
00448 }
00449 }
00450
00451 void *dkcstd_memcpy_movups_sse_prefetch256( void *dest, const void *src, size_t count )
00452 {
00453 DKCSTD_MEMCPY_FILTER(256);
00454 _asm {
00455 mov edi,dest;
00456 mov esi,src;
00457 mov ecx,count;
00458 mov eax,ecx;
00459 shr ecx,5;
00460 lx:
00461 movups xmm0,[esi];
00462 movups xmm1,[esi+16];
00463 lea esi,[esi+32];
00464 movups [edi],xmm0;
00465 prefetcht0 [esi+1024];
00466 movups [edi+16],xmm1;
00467 lea edi,[edi+32];
00468 dec ecx;
00469 jnz lx;
00470 ;MiniCopy:
00471 mov ecx,eax ;
00472 and ecx, 31 ;( count % 32 )
00473 rep movsb ;N BYTE MOVE
00474 cld
00475 mov eax,[dest] ;V return dest
00476 }
00477
00478
00479 }
00480
00481
00482 void *dkcstd_memcpy_movntps_sse_prefetch256( void *dest, const void *src, size_t count )
00483 {
00484 DKCSTD_MEMCPY_FILTER(256);
00485 _asm {
00486 mov edi,dest;
00487 mov esi,src;
00488 mov ecx,count;
00489 mov eax,ecx;
00490 shr ecx,5;
00491 lx:
00492 movaps xmm0,[esi];
00493 movaps xmm1,[esi+16];
00494 lea esi,[esi+32];
00495 movntps [edi],xmm0;
00496 prefetcht0 [esi+1024];
00497 movntps [edi+16],xmm1;
00498 lea edi,[edi+32];
00499 dec ecx;
00500 jnz lx;
00501 ;MiniCopy:
00502 mov ecx,eax ;
00503 and ecx, 31 ;( count % 32 )
00504 rep movsb ;N BYTE MOVE
00505 cld
00506 mov eax,[dest] ;V return dest
00507 }
00508
00509
00510 }
00511
00512
00513
00514
00516 void* dkcstd_memmove( void* dst, const void* src, size_t cnt )
00517 {
00518 __asm {
00519 mov esi, [src] ;U esi = const void* src
00520 mov edx, [cnt] ;V edx = void* cnt
00521 mov edi, [dst] ;U edi = ulong dst
00522 mov ebx, edx ;V
00523 mov eax, 03h ;U eax = const ulong 3 (for masking)
00524 add ebx, esi ;V ebx = const void* src+cnt
00525
00526 cmp edi, esi ;
00527 jbe CopyUp ;
00528 cmp edi, ebx ; if( src < dst < src+cnt )
00529 jb CopyDown ; downward copy
00530
00531 CopyUp:
00532 cmp edx, eax ; if( cnt<=3 )
00533 jbe MiniCopy ; byte by byte copy
00534
00535 mov ebx, edi ;U
00536 mov ecx, eax ;V
00537 and ebx, eax ;U ebx = (dst&3)
00538 inc ecx ;V
00539 sub ecx, ebx ; ecx = (4-(dst&3))
00540 and ecx, eax ; ecx = {dst%4 0->0 1->3 2->2 3->1}
00541 sub edx, ecx ;
00542 rep movsb ;N BYTE MOVE (align dst)
00543
00544 mov ecx, edx ;
00545 shr ecx, 2 ; ecx = [rest bytes]/4
00546 and edx, eax ; edx = [rest bytes]%4
00547 rep movsd ;N DWORD MOVE
00548 jmp MiniCopy ;
00549
00550 CopyDown:
00551 std ;
00552 lea esi,[esi+edx-1] ;
00553 lea edi,[edi+edx-1] ;
00554
00555 cmp edx, 4 ; if( cnt<=4 )
00556 jbe MiniCopy ; byte by byte copy
00557
00558 mov ecx, edi ;
00559 and ecx, eax ;
00560 inc ecx ; ecx = {dst%4 0->1 1->2 2->3 3->4}
00561 sub edx, ecx ;
00562 rep movsb ;N BYTE MOVE (align dst @ dword)
00563
00564 sub edi, eax ;U
00565 mov ecx, edx ;V
00566 sub esi, eax ;U
00567 shr ecx, 2 ;V ecx = [rest bytes]/4
00568 and edx, eax ; edx = [rest bytes]%4
00569 rep movsd ;N DWORD MOVE
00570 add edi, eax ;U
00571 add esi, eax ;V
00572
00573 MiniCopy:
00574 mov ecx, edx ;
00575 rep movsb ;N BYTE MOVE
00576
00577 cld ;U
00578 mov eax, [dst] ;V return dst
00579 }
00580 }
00581
00582
00583
00584 #pragma warning( default : 4035 )
00585
00586 #else
00587
00588
00590
00591
00593 void *dkcstd_memmove( void *dest, const void *src, size_t count ){
00594 uint8 *d = (uint8 *)dest;
00595 const uint8 *s = (const uint8 *)src;
00596 size_t i;
00597
00598 uint8 *pb;
00599
00600 size_t alloc_offset;
00601
00602
00603 pb = (uint8*)s + count;
00604 if((void *)pb > dest){
00605 alloc_offset = (size_t)(uint8*)(pb - d);
00606 pb = (uint8*)malloc(alloc_offset);
00607
00608 memcpy(pb,dest,alloc_offset);
00609 }else{
00610 alloc_offset = 0;
00611 pb = NULL;
00612 }
00613
00614
00615
00616 {
00617 size_t c = count - alloc_offset;
00618 for(i=0;i<c;i++){
00619 d[i] = s[i];
00620 }
00621 d += i;
00622 for(i=0;i<alloc_offset;i++){
00623 d[i] = pb[i];
00624 }
00625 }
00626
00627
00628 if(pb){
00629 free(pb);
00630 }
00631 return dest;
00632 }
00633
00634 #endif
00635
00636 void *dkcstd_memcpy32( void *dest, const void *src, size_t count ){
00637
00638 size_t i;
00639 size_t *d,*s;
00640 size_t cnt = count / sizeof(size_t);
00641
00642 size_t mod_ = count % sizeof(size_t);
00643 d = (size_t *)dest;
00644 s = (size_t *)src;
00645 for(i=0;i<cnt;i++){
00646 d[i] = s[i];
00647 }
00648
00649 cnt = count - mod_;
00650 for(i=0;i<mod_;i++){
00651 (((uint8 *)dest)[cnt + i]) = (((const uint8 *)src)[cnt + i]);
00652 }
00653 return dest;
00654 }
00655
00656 void *dkcstd_memcpy64f( void *dest, const void *src, size_t count ){
00657 #define dkcd64BIT_T double
00658 size_t i;
00659 dkcd64BIT_T *d,*s;
00660 size_t cnt = count / sizeof(dkcd64BIT_T);
00661
00662 size_t mod_ = count % sizeof(dkcd64BIT_T);
00663 d = (dkcd64BIT_T *)dest;
00664 s = (dkcd64BIT_T *)src;
00665 for(i=0;i<cnt;i++){
00666 d[i] = s[i];
00667 }
00668
00669 cnt = count - mod_;
00670 for(i=0;i<mod_;i++){
00671 (((uint8 *)dest)[cnt + i]) = (((const uint8 *)src)[cnt + i]);
00672 }
00673 return dest;
00674 }
00675
00676 void *dkcstd_memcpy64( void *dest, const void *src, size_t count ){
00677 #ifndef WIN32
00678 return dkcstd_memcpy64f(dest,src,count);
00679 #else
00680
00681 #undef dkcd64BIT_T
00682 #define dkcd64BIT_T ULONGLONG
00683 size_t i;
00684 dkcd64BIT_T *d,*s;
00685 size_t cnt = count / sizeof(dkcd64BIT_T);
00686
00687 size_t mod_ = count % sizeof(dkcd64BIT_T);
00688 d = (dkcd64BIT_T *)dest;
00689 s = (dkcd64BIT_T *)src;
00690 for(i=0;i<cnt;i++){
00691 d[i] = s[i];
00692 }
00693
00694 cnt = count - mod_;
00695 for(i=0;i<mod_;i++){
00696 (((uint8 *)dest)[cnt + i]) = (((const uint8 *)src)[cnt + i]);
00697 }
00698 return dest;
00699 #endif
00700 }
00701
00702 void *dkcstd_memcpy( void *dest, const void *src, size_t count ){
00703 return dkcstd_memcpy64(dest,src,count);
00704 }
00705
00706
00707 void *dkcstd_memset( void *dest, int c, size_t count ){
00708 uint8 cc = (uint8)c;
00709 uint32 ccc = cc | (cc << 8) | (cc << 16) | (cc << 24);
00710 size_t i;
00711 size_t cnt = count / sizeof(uint32);
00712 size_t mod = count % sizeof(uint32);
00713 size_t *d = dest;
00714 for(i=0;i<cnt;i++){
00715 d[i] = ccc;
00716 }
00717 dkcstd_memset8(&d[i],c,mod);
00718 return dest;
00719 }
00720
00721 int dkcstd_memcmp( const void *buf1, const void *buf2, size_t count ){
00722 size_t i;
00723 const size_t *d,*s;
00724 size_t cnt = count / sizeof(size_t);
00725
00726 size_t mod_ = count % sizeof(size_t);
00727 d = (const size_t *)buf1;
00728 s = (const size_t *)buf2;
00729 for(i=0;i<cnt;i++){
00730 if(d[i] != s[i]){
00731
00732
00733
00734
00735
00736
00737 const int *a,*b;
00738 a = &d[i];
00739 b = &s[i];
00740 return *a - *b;
00741
00742
00743 }
00744 }
00745 cnt = count - mod_;
00746 return dkcstd_memcmp8(buf1,buf2,count);
00747 }
00748
00749
00750
00751 void *dkcstd_memchr( const void *buf, int c, size_t count ){
00752 return dkcstd_memchr8( buf, c, count );
00753 }
00754
00755
00756
00757