Skip to content

Commit

Permalink
Renamed all __out variables.
Browse files Browse the repository at this point in the history
  • Loading branch information
tdulcet committed Feb 17, 2024
1 parent 014481b commit ba0d321
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 45 deletions.
10 changes: 5 additions & 5 deletions src/radix24_ditN_cy_dif1_gcc32.h
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
"addl %%eax,%%ebx /* eax <- add0+p05 */\n\t"\
"addl %%eax,%%ecx /* ebx <- add0+p06 */\n\t"\
"addl %%eax,%%edx /* ecx <- add0+p07 */\n\t"\
"movl %[__out],%%esi /* s1p00r */\n\t"\
"movl %[___out],%%esi /* s1p00r */\n\t"\
"/* MSVC macro assumes add8+p[7,6,5,4] in eax,ebx,ecx,edx, but here get add0+p[4,5,6,7], so replace eax <-> edx and ebx <-> ecx: */\n\t"\
"/* Do the p0,p4 combo: */\n\t"\
"movaps (%%edx),%%xmm0 \n\t"\
Expand Down Expand Up @@ -903,7 +903,7 @@
,[__p04] "m" (Xp04)\
,[__p08] "m" (Xp08)\
,[__p16] "m" (Xp16)\
,[__out] "m" (Xout)\
,[___out] "m" (Xout)\
,[__isrt2] "m" (Xisrt2)\
,[__cc3] "m" (Xcc3)\
: "cc","memory","eax",/*"ebx",*/"ecx","edx","edi","esi","xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7" /* Clobbered registers */\
Expand All @@ -914,7 +914,7 @@
{\
__asm__ volatile (\
"pushl %%ebx \n\t"/* Explicit save/restore of PIC register */\
"movl %[__out],%%eax /* s1p00r */\n\t"\
"movl %[___out],%%eax /* s1p00r */\n\t"\
"movl %%eax,%%ebx /* s1p00r */\n\t"\
"movl %%eax,%%ecx /* s1p08r */\n\t"\
"movl %[__cc3],%%edx \n\t"\
Expand Down Expand Up @@ -1282,7 +1282,7 @@
"\n\t"\
"/* For the radix-8 DIF DFTs, the input offsets always have the same pattern; outputs are permuted */\n\t"\
"/* SSE2_RADIX8_DIF_0TWIDDLE( i[0-7] = s1p00r + 0x[0a4e82c6]0, o[0-7] = add0 + p[01235476]) */\n\t"\
"movl %[__out],%%eax /* i0 = s1p00r */\n\t"\
"movl %[___out],%%eax /* i0 = s1p00r */\n\t"\
"movl $0x40 ,%%ebx /* i2 */ \n\t"\
"movl $0x80 ,%%ecx /* i4 */ \n\t"\
"movl $0xc0 ,%%edx /* i6 */ \n\t"\
Expand Down Expand Up @@ -1911,7 +1911,7 @@
,[__p07] "m" (Xp07)\
,[__p08] "m" (Xp08)\
,[__p16] "m" (Xp16)\
,[__out] "m" (Xout)\
,[___out] "m" (Xout)\
,[__isrt2] "m" (Xisrt2)\
,[__cc3] "m" (Xcc3)\
: "cc","memory","eax",/*"ebx",*/"ecx","edx","edi","esi","xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7" /* Clobbered registers */\
Expand Down
20 changes: 10 additions & 10 deletions src/radix24_ditN_cy_dif1_gcc64.h
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
"leaq (%%rax,%%rbx,8),%%rbx /* eax <- add0+p05 */\n\t"\
"leaq (%%rax,%%rcx,8),%%rcx /* ebx <- add0+p06 */\n\t"\
"leaq (%%rax,%%rdx,8),%%rdx /* ecx <- add0+p07 */\n\t"\
"movq %[__out],%%rsi /* s1p00r */\n\t"\
"movq %[___out],%%rsi /* s1p00r */\n\t"\
"/* MSVC macro has add8+p[7654] in abcd, here add0+p[4567], so swap a/d and b/c: */\n\t"\
"/* Do the p0,p4 combo: */\n\t"\
"vmovaps (%%rdx),%%ymm0 \n\t"\
Expand Down Expand Up @@ -786,7 +786,7 @@
,[__p04] "m" (Xp04)\
,[__p08] "m" (Xp08)\
,[__p16] "m" (Xp16)\
,[__out] "m" (Xout)\
,[___out] "m" (Xout)\
,[__isrt2] "m" (Xisrt2)\
,[__cc3] "m" (Xcc3)\
: "cc","memory","rax","rbx","rcx","rdx","rdi","rsi","xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7" /* Clobbered registers */\
Expand All @@ -796,7 +796,7 @@
#define SSE2_RADIX24_DIF_NOTWIDDLE(Xadd,Xp01,Xp02,Xp03,Xp04,Xp05,Xp06,Xp07,Xp08,Xp16,Xout,Xisrt2,Xcc3)\
{\
__asm__ volatile (\
"movq %[__out],%%rax /* s1p00r */\n\t"\
"movq %[___out],%%rax /* s1p00r */\n\t"\
"movq %%rax,%%rbx /* s1p00r */\n\t"\
"movq %%rax,%%rcx /* s1p08r */\n\t"\
"movq %[__cc3],%%rdx \n\t"\
Expand Down Expand Up @@ -1107,7 +1107,7 @@
"movq %[__isrt2],%%rsi \n\t"\
"/* For the radix-8 DIF DFTs, the input offsets always have the same pattern; outputs are permuted */\n\t"\
"/* SSE2_RADIX8_DIF_0TWIDDLE( i[0-7] = s1p00r + 0x[0a4e82c6]0, o[0-7] = add0 + p[01235476]) */\n\t"\
"movq %[__out],%%rax /* i0 = s1p00r */\n\t"\
"movq %[___out],%%rax /* i0 = s1p00r */\n\t"\
"movq $0x080 ,%%rbx /* i2 */ \n\t"\
"movq $0x100 ,%%rcx /* i4 */ \n\t"\
"movq $0x180 ,%%rdx /* i6 */ \n\t"\
Expand Down Expand Up @@ -1649,7 +1649,7 @@
,[__p07] "m" (Xp07)\
,[__p08] "m" (Xp08)\
,[__p16] "m" (Xp16)\
,[__out] "m" (Xout)\
,[___out] "m" (Xout)\
,[__isrt2] "m" (Xisrt2)\
,[__cc3] "m" (Xcc3)\
: "cc","memory","rax","rbx","rcx","rdx","rdi","rsi","xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7" /* Clobbered registers */\
Expand All @@ -1673,7 +1673,7 @@
"leaq (%%rax,%%rbx,8),%%rbx /* eax <- add0+p05 */\n\t"\
"leaq (%%rax,%%rcx,8),%%rcx /* ebx <- add0+p06 */\n\t"\
"leaq (%%rax,%%rdx,8),%%rdx /* ecx <- add0+p07 */\n\t"\
"movq %[__out],%%rsi /* s1p00r */\n\t"\
"movq %[___out],%%rsi /* s1p00r */\n\t"\
"/* MSVC macro assumes add8+p[7654] in abcd, but here get add0+p[4,5,6,7], so swap a/d and b/c: */\n\t"\
"/* Do the p0,p4 combo: */\n\t"\
"movaps (%%rdx),%%xmm0 \n\t"\
Expand Down Expand Up @@ -2416,7 +2416,7 @@
,[__p04] "m" (Xp04)\
,[__p08] "m" (Xp08)\
,[__p16] "m" (Xp16)\
,[__out] "m" (Xout)\
,[___out] "m" (Xout)\
,[__isrt2] "m" (Xisrt2)\
,[__cc3] "m" (Xcc3)\
: "cc","memory","rax","rbx","rcx","rdx","rdi","rsi","xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7" /* Clobbered registers */\
Expand All @@ -2426,7 +2426,7 @@
#define SSE2_RADIX24_DIF_NOTWIDDLE(Xadd,Xp01,Xp02,Xp03,Xp04,Xp05,Xp06,Xp07,Xp08,Xp16,Xout,Xisrt2,Xcc3)\
{\
__asm__ volatile (\
"movq %[__out],%%rax /* s1p00r */\n\t"\
"movq %[___out],%%rax /* s1p00r */\n\t"\
"movq %%rax,%%rbx /* s1p00r */\n\t"\
"movq %%rax,%%rcx /* s1p08r */\n\t"\
"movq %[__cc3],%%rdx \n\t"\
Expand Down Expand Up @@ -2737,7 +2737,7 @@
"movq %[__isrt2],%%rsi \n\t"\
"/* For the radix-8 DIF DFTs, the input offsets always have the same pattern; outputs are permuted */\n\t"\
"/* SSE2_RADIX8_DIF_0TWIDDLE( i[0-7] = s1p00r + 0x[0a4e82c6]0, o[0-7] = add0 + p[01235476]) */\n\t"\
"movq %[__out],%%rax /* i0 = s1p00r */\n\t"\
"movq %[___out],%%rax /* i0 = s1p00r */\n\t"\
"movq $0x40 ,%%rbx /* i2 */ \n\t"\
"movq $0x80 ,%%rcx /* i4 */ \n\t"\
"movq $0xc0 ,%%rdx /* i6 */ \n\t"\
Expand Down Expand Up @@ -3279,7 +3279,7 @@
,[__p07] "m" (Xp07)\
,[__p08] "m" (Xp08)\
,[__p16] "m" (Xp16)\
,[__out] "m" (Xout)\
,[___out] "m" (Xout)\
,[__isrt2] "m" (Xisrt2)\
,[__cc3] "m" (Xcc3)\
: "cc","memory","rax","rbx","rcx","rdx","rdi","rsi","xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7" /* Clobbered registers */\
Expand Down
12 changes: 6 additions & 6 deletions src/radix28_ditN_cy_dif1_gcc32.h
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
__asm__ volatile (\
"pushl %%ebx \n\t"/* Explicit save/restore of PIC register */\
"/* SSE2_RADIX4_DIT_0TWIDDLE_STRIDE_E(add00+p[0,1,3,2], s1p00r,s1p03r,s1p02r,s1p01r): */\n\t"\
"movl %[__out],%%esi /* s1p00r */\n\t"\
"movl %[___out],%%esi /* s1p00r */\n\t"\
"movl %[__add],%%eax\n\t"\
"movl %[__p01],%%ebx\n\t"\
"movl %[__p02],%%ecx\n\t"\
Expand Down Expand Up @@ -394,7 +394,7 @@
"t1r=A1r+A6r; t2r=A2r+A5r; t3r=A3r+A4r;\n\t"\
"t6r=A1r-A6r; t5r=A2r-A5r; t4r=A3r-A4r;\n\t"\
"*/\n\t"\
"movl %[__out],%%edi /* s1p00r */\n\t"\
"movl %[___out],%%edi /* s1p00r */\n\t"\
"movl %[__cc0],%%eax /* cc0 */\n\t"\
"movl $0x20,%%ebx\n\t"\
"movl $0x40,%%ecx\n\t"\
Expand Down Expand Up @@ -1155,7 +1155,7 @@
,[__p16] "m" (Xp16)\
,[__p20] "m" (Xp20)\
,[__p24] "m" (Xp24)\
,[__out] "m" (Xout)\
,[___out] "m" (Xout)\
,[__cc0] "m" (Xcc0)\
: "cc","memory","eax",/*"ebx",*/"ecx","edx","edi","esi","xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7" /* Clobbered registers */\
);\
Expand All @@ -1167,7 +1167,7 @@
__asm__ volatile (\
"pushl %%ebx \n\t"/* Explicit save/restore of PIC register */\
"/* SSE2_RADIX_07_DFT(s1p00r,s1p24r,s1p20r,s1p16r,s1p12r,s1p08r,s1p04r,cc0,s1p00r,s1p04r,s1p08r,s1p12r,s1p16r,s1p20r,s1p24r): */\n\t"\
"movl %[__out],%%edi /* s1p00r */\n\t"\
"movl %[___out],%%edi /* s1p00r */\n\t"\
"movl %[__cc0],%%eax /* cc0 */\n\t"\
"movl $0x20,%%ebx\n\t"\
"movl $0x40,%%ecx\n\t"\
Expand Down Expand Up @@ -1882,7 +1882,7 @@
"/* Since doing radix-7 in-place here, inputs of radix-4 are in consecutive memory locs, i.e. 0x20 bytes apart, e.g. the distance between s1p00r and s1p01r: */\n\t"\
"\n\t"\
"/* SSE2_RADIX4_DIF_0TWIDDLE_STRIDE_E(s1p00r,s1p01r,s1p02r,s1p03r, add00+p[0,1,2,3]): */\n\t"\
"movl %[__out],%%esi /* s1p00r */\n\t"\
"movl %[___out],%%esi /* s1p00r */\n\t"\
"movl %[__add],%%eax\n\t"\
"movl %[__p01],%%ebx\n\t"\
"movl %[__p02],%%ecx\n\t"\
Expand Down Expand Up @@ -2255,7 +2255,7 @@
,[__p16] "m" (Xp16)\
,[__p20] "m" (Xp20)\
,[__p24] "m" (Xp24)\
,[__out] "m" (Xout)\
,[___out] "m" (Xout)\
,[__cc0] "m" (Xcc0)\
: "cc","memory","eax",/*"ebx",*/"ecx","edx","edi","esi","xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7" /* Clobbered registers */\
);\
Expand Down
24 changes: 12 additions & 12 deletions src/radix28_ditN_cy_dif1_gcc64.h
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
{\
__asm__ volatile (\
"/* 1:SSE2_RADIX4_DIT_0TWIDDLE_STRIDE(add00+p0132, s1p00,03,02,01): */ \n\t"\
"movq %[__out],%%rsi \n\t"\
"movq %[___out],%%rsi \n\t"\
"movq %[__add],%%rax \n\t"\
"movslq %[__p01],%%rbx \n\t"\
"movslq %[__p02],%%rcx \n\t"\
Expand Down Expand Up @@ -222,7 +222,7 @@
"/*...and now do 4 radix-7 transforms...*/ \n\t"\
"/***************************************/ \n\t"\
"/* SSE2_RADIX_07_DFT(00,04,08,12,16,20,24 -> 00,08,16,24,04,12,20): */ \n\t"\
"movq %[__out],%%rdi \n\t"\
"movq %[___out],%%rdi \n\t"\
"movq %[__cc0],%%rax \n\t"\
"movq $0x040,%%rbx \n\t"\
"movq $0x080,%%rcx \n\t"\
Expand Down Expand Up @@ -529,7 +529,7 @@
,[__p16] "m" (Xp16)\
,[__p20] "m" (Xp20)\
,[__p24] "m" (Xp24)\
,[__out] "m" (Xout)\
,[___out] "m" (Xout)\
,[__cc0] "m" (Xcc0)\
: "cc","memory","rax","rbx","rcx","rdx","rdi","rsi","xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14","xmm15" /* Clobbered registers */\
);\
Expand All @@ -539,7 +539,7 @@
{\
__asm__ volatile (\
"/* SSE2_RADIX_07_DFT(00,24,20,16,12,08,04r -> 00,04,08,12,16,20,24): */\n\t"\
"movq %[__out],%%rdi \n\t"\
"movq %[___out],%%rdi \n\t"\
"movq %[__cc0],%%rax \n\t"\
"movq $0x040,%%rbx \n\t"\
"movq $0x080,%%rcx \n\t"\
Expand Down Expand Up @@ -839,7 +839,7 @@
"/* Since doing radix-7 in-place here, inputs of radix-4 are in consecutive memory locs, i.e. 0x40 bytes apart, e.g. the distance between s1p00r and s1p01r: */\n\t"\
"\n\t"\
"/* 1:SSE2_RADIX4_DIF_0TWIDDLE_STRIDE_E(s1p00,01,02,03 -> add00+p0123): */ \n\t"\
"movq %[__out],%%rsi /* s1p00r */ \n\t"\
"movq %[___out],%%rsi /* s1p00r */ \n\t"\
"movq %[__add],%%rax \n\t"\
"movslq %[__p01],%%rbx \n\t"\
"movslq %[__p02],%%rcx \n\t"\
Expand Down Expand Up @@ -1032,7 +1032,7 @@
,[__p16] "m" (Xp16)\
,[__p20] "m" (Xp20)\
,[__p24] "m" (Xp24)\
,[__out] "m" (Xout)\
,[___out] "m" (Xout)\
,[__cc0] "m" (Xcc0)\
: "cc","memory","rax","rbx","rcx","rdx","rdi","rsi","xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14","xmm15" /* Clobbered registers */\
);\
Expand All @@ -1044,7 +1044,7 @@
{\
__asm__ volatile (\
"/* 1:SSE2_RADIX4_DIT_0TWIDDLE_STRIDE(add00+p0132, s1p00,03,02,01): */ \n\t"\
"movq %[__out],%%rsi \n\t"\
"movq %[___out],%%rsi \n\t"\
"movq %[__add],%%rax \n\t"\
"movslq %[__p01],%%rbx \n\t"\
"movslq %[__p02],%%rcx \n\t"\
Expand Down Expand Up @@ -1234,7 +1234,7 @@
"/*...and now do 4 radix-7 transforms...*/ \n\t"\
"/***************************************/ \n\t"\
"/* SSE2_RADIX_07_DFT(00,04,08,12,16,20,24 -> 00,08,16,24,04,12,20): */ \n\t"\
"movq %[__out],%%rdi \n\t"\
"movq %[___out],%%rdi \n\t"\
"movq %[__cc0],%%rax \n\t"\
"movq $0x020,%%rbx \n\t"\
"movq $0x040,%%rcx \n\t"\
Expand Down Expand Up @@ -1541,7 +1541,7 @@
,[__p16] "m" (Xp16)\
,[__p20] "m" (Xp20)\
,[__p24] "m" (Xp24)\
,[__out] "m" (Xout)\
,[___out] "m" (Xout)\
,[__cc0] "m" (Xcc0)\
: "cc","memory","rax","rbx","rcx","rdx","rdi","rsi","xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14","xmm15" /* Clobbered registers */\
);\
Expand All @@ -1551,7 +1551,7 @@
{\
__asm__ volatile (\
"/* SSE2_RADIX_07_DFT(00,24,20,16,12,08,04r -> 00,04,08,12,16,20,24): */\n\t"\
"movq %[__out],%%rdi \n\t"\
"movq %[___out],%%rdi \n\t"\
"movq %[__cc0],%%rax \n\t"\
"movq $0x020,%%rbx \n\t"\
"movq $0x040,%%rcx \n\t"\
Expand Down Expand Up @@ -1851,7 +1851,7 @@
"/* Since doing radix-7 in-place here, inputs of radix-4 are in consecutive memory locs, i.e. 0x20 bytes apart, e.g. the distance between s1p00r and s1p01r: */\n\t"\
"\n\t"\
"/* 1:SSE2_RADIX4_DIF_0TWIDDLE_STRIDE_E(s1p00,01,02,03 -> add00+p0123): */ \n\t"\
"movq %[__out],%%rsi /* s1p00r */ \n\t"\
"movq %[___out],%%rsi /* s1p00r */ \n\t"\
"movq %[__add],%%rax \n\t"\
"movslq %[__p01],%%rbx \n\t"\
"movslq %[__p02],%%rcx \n\t"\
Expand Down Expand Up @@ -2044,7 +2044,7 @@
,[__p16] "m" (Xp16)\
,[__p20] "m" (Xp20)\
,[__p24] "m" (Xp24)\
,[__out] "m" (Xout)\
,[___out] "m" (Xout)\
,[__cc0] "m" (Xcc0)\
: "cc","memory","rax","rbx","rcx","rdx","rdi","rsi","xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14","xmm15" /* Clobbered registers */\
);\
Expand Down
4 changes: 2 additions & 2 deletions src/sse2_macro_gcc32.h
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -1329,7 +1329,7 @@ We use shufpd xmm, xmm, 1 to swap lo and hi doubles of an xmm register for the v
"movl %[__in5],%%ebx \n\t"\
"movl %[__in6],%%ecx \n\t"\
"movl %[__in7],%%edx \n\t"\
"movl %[__out],%%esi \n\t"\
"movl %[___out],%%esi \n\t"\
"movaps (%%eax),%%xmm0 \n\t"\
"movaps 0x10(%%eax),%%xmm1 \n\t"\
"movaps %%xmm0,%%xmm2 \n\t"\
Expand Down Expand Up @@ -1476,7 +1476,7 @@ We use shufpd xmm, xmm, 1 to swap lo and hi doubles of an xmm register for the v
,[__in5] "m" (Xin5)\
,[__in6] "m" (Xin6)\
,[__in7] "m" (Xin7)\
,[__out] "m" (Xout)\
,[___out] "m" (Xout)\
,[__isrt2] "m" (Xisrt2)\
: "cc","memory","eax",/*"ebx",*/"ecx","edx","edi","esi","xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7" /* Clobbered registers */\
);\
Expand Down
Loading

0 comments on commit ba0d321

Please sign in to comment.