More clobber register corrections.

This commit is contained in:
Helvetix Victorinox
2004-07-20 06:33:03 +00:00
parent 49fa9b7589
commit d02bc5453b
3 changed files with 89 additions and 66 deletions

View File

@ -1,3 +1,11 @@
2004-07-19 Helvetix Victorinox <helvetix@gimp.org>
* app/composite/gimp-composite-mmx.c (xxxgimp_composite_dodge_rgba8_rgba8_rgba8_mmx)
* app/composite/gimp-composite-mmx.c (xxxgimp_composite_divide_rgba8_rgba8_rgba8_mmx)
* app/composite/gimp-composite-mmx.c (gimp_composite_difference_rgba8_rgba8_rgba8_mmx)
* app/composite/gimp-composite-mmx.c (gimp_composite_darken_rgba8_rgba8_rgba8_mmx):
More clobber register corrections.
2004-07-20 Sven Neumann <sven@gimp.org> 2004-07-20 Sven Neumann <sven@gimp.org>
* Made 2.1.2 release. * Made 2.1.2 release.

View File

@ -85,13 +85,14 @@ gimp_composite_addition_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
uint64 *d = (uint64 *) _op->D; uint64 *d = (uint64 *) _op->D;
uint64 *a = (uint64 *) _op->A; uint64 *a = (uint64 *) _op->A;
uint64 *b = (uint64 *) _op->B; uint64 *b = (uint64 *) _op->B;
gulong n_pixels = _op->n_pixels;
asm volatile ("movq %0,%%mm0" asm volatile ("movq %0,%%mm0"
: /* empty */ : /* empty */
: "m" (*rgba8_alpha_mask) : "m" (*rgba8_alpha_mask)
: "%mm0"); : "%mm0");
for (; _op->n_pixels >= 2; _op->n_pixels -= 2) for (; n_pixels >= 2; n_pixels -= 2)
{ {
asm volatile (" movq %1, %%mm2\n" asm volatile (" movq %1, %%mm2\n"
"\tmovq %2, %%mm3\n" "\tmovq %2, %%mm3\n"
@ -111,7 +112,7 @@ gimp_composite_addition_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
d++; d++;
} }
if (_op->n_pixels) if (n_pixels > 0)
{ {
asm volatile (" movd %1, %%mm2\n" asm volatile (" movd %1, %%mm2\n"
"\tmovd %2, %%mm3\n" "\tmovd %2, %%mm3\n"
@ -137,8 +138,9 @@ gimp_composite_burn_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
uint64 *d = (uint64 *) _op->D; uint64 *d = (uint64 *) _op->D;
uint64 *a = (uint64 *) _op->A; uint64 *a = (uint64 *) _op->A;
uint64 *b = (uint64 *) _op->B; uint64 *b = (uint64 *) _op->B;
gulong n_pixels = _op->n_pixels;
for (; _op->n_pixels >= 2; _op->n_pixels -= 2) for (; n_pixels >= 2; n_pixels -= 2)
{ {
asm volatile (" movq %1,%%mm0\n" asm volatile (" movq %1,%%mm0\n"
"\tmovq %2,%%mm1\n" "\tmovq %2,%%mm1\n"
@ -192,7 +194,7 @@ gimp_composite_burn_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
a++; a++;
} }
if (_op->n_pixels) if (n_pixels > 0)
{ {
asm volatile (" movd %1,%%mm0\n" asm volatile (" movd %1,%%mm0\n"
"\tmovd %2,%%mm1\n" "\tmovd %2,%%mm1\n"
@ -250,31 +252,34 @@ gimp_composite_burn_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
void void
gimp_composite_darken_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) gimp_composite_darken_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
{ {
GimpCompositeContext op = *_op; uint64 *d = (uint64 *) _op->D;
uint64 *a = (uint64 *) _op->A;
uint64 *b = (uint64 *) _op->B;
gulong n_pixels = _op->n_pixels;
for (; op.n_pixels >= 2; op.n_pixels -= 2) for (; n_pixels >= 2; n_pixels -= 2)
{ {
asm volatile (" movq %1, %%mm2\n" asm volatile (" movq %1, %%mm2\n"
"\tmovq %2, %%mm3\n" "\tmovq %2, %%mm3\n"
"\t" pminub(mm3, mm2, mm4) "\n" "\t" pminub(mm3, mm2, mm4) "\n"
"\tmovq %%mm2, %0\n" "\tmovq %%mm2, %0\n"
: "=m" (*op.D) : "=m" (*d)
: "m" (*op.A), "m" (*op.B) : "m" (*a), "m" (*b)
: "%mm1", "%mm2", "%mm3", "%mm4"); : "%mm1", "%mm2", "%mm3", "%mm4");
op.A += 8; a++;
op.B += 8; b++;
op.D += 8; d++;
} }
if (op.n_pixels) if (n_pixels > 0)
{ {
asm volatile (" movd %0, %%mm2\n" asm volatile (" movd %1, %%mm2\n"
"\tmovd %1, %%mm3\n" "\tmovd %2, %%mm3\n"
"\t" pminub(mm3, mm2, mm4) "\n" "\t" pminub(mm3, mm2, mm4) "\n"
"\tmovd %%mm2, %2\n" "\tmovd %%mm2, %0\n"
: /* empty */ : "=m" (*d)
: "m" (*op.A), "m" (*op.B), "m" (*op.D) : "m" (*a), "m" (*b)
: "0", "1", "2", "%mm2", "%mm3", "%mm4"); : "%mm2", "%mm3", "%mm4");
} }
asm("emms"); asm("emms");
@ -283,14 +288,17 @@ gimp_composite_darken_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
void void
gimp_composite_difference_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) gimp_composite_difference_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
{ {
GimpCompositeContext op = *_op; uint64 *d = (uint64 *) _op->D;
uint64 *a = (uint64 *) _op->A;
uint64 *b = (uint64 *) _op->B;
gulong n_pixels = _op->n_pixels;
asm volatile ("movq %0,%%mm0" : : "m" (*rgba8_alpha_mask) : "%mm0"); asm volatile ("movq %0,%%mm0" : : "m" (*rgba8_alpha_mask) : "%mm0");
for (; op.n_pixels >= 2; op.n_pixels -= 2) for (; n_pixels >= 2; n_pixels -= 2)
{ {
asm volatile (" movq %0, %%mm2\n" asm volatile (" movq %1, %%mm2\n"
"\tmovq %1, %%mm3\n" "\tmovq %2, %%mm3\n"
"\tmovq %%mm2, %%mm4\n" "\tmovq %%mm2, %%mm4\n"
"\tmovq %%mm3, %%mm5\n" "\tmovq %%mm3, %%mm5\n"
"\tpsubusb %%mm3, %%mm4\n" "\tpsubusb %%mm3, %%mm4\n"
@ -301,19 +309,19 @@ gimp_composite_difference_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
"\t" pminub(mm3,mm2,mm4) "\n" "\t" pminub(mm3,mm2,mm4) "\n"
"\tpand %%mm0, %%mm2\n" "\tpand %%mm0, %%mm2\n"
"\tpor %%mm2, %%mm1\n" "\tpor %%mm2, %%mm1\n"
"\tmovq %%mm1, %2\n" "\tmovq %%mm1, %0\n"
: /* empty */ : "=m" (*d)
: "m" (*op.A), "m" (*op.B), "m" (*op.D) : "m" (*a), "m" (*b)
: "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4"); : "%mm1", "%mm2", "%mm3", "%mm4");
op.A += 8; a++;
op.B += 8; b++;
op.D += 8; d++;
} }
if (op.n_pixels) if (n_pixels)
{ {
asm volatile (" movd %0, %%mm2\n" asm volatile (" movd %1, %%mm2\n"
"\tmovd %1, %%mm3\n" "\tmovd %2, %%mm3\n"
"\tmovq %%mm2, %%mm4\n" "\tmovq %%mm2, %%mm4\n"
"\tmovq %%mm3, %%mm5\n" "\tmovq %%mm3, %%mm5\n"
"\tpsubusb %%mm3, %%mm4\n" "\tpsubusb %%mm3, %%mm4\n"
@ -324,10 +332,10 @@ gimp_composite_difference_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
"\t" pminub(mm3,mm2,mm4) "\n" "\t" pminub(mm3,mm2,mm4) "\n"
"\tpand %%mm0, %%mm2\n" "\tpand %%mm0, %%mm2\n"
"\tpor %%mm2, %%mm1\n" "\tpor %%mm2, %%mm1\n"
"\tmovd %%mm1, %2\n" "\tmovd %%mm1, %0\n"
: /* empty */ : "=m" (*d)
: "m" (*op.A), "m" (*op.B), "m" (*op.D) : "m" (*a), "m" (*b)
: "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4"); : "%mm1", "%mm2", "%mm3", "%mm4");
} }
asm("emms"); asm("emms");
@ -336,7 +344,10 @@ gimp_composite_difference_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
void void
xxxgimp_composite_divide_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) xxxgimp_composite_divide_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
{ {
GimpCompositeContext op = *_op; uint64 *d = (uint64 *) _op->D;
uint64 *a = (uint64 *) _op->A;
uint64 *b = (uint64 *) _op->B;
gulong n_pixels = _op->n_pixels;
asm volatile (" movq %0, %%mm0\n" asm volatile (" movq %0, %%mm0\n"
"\tmovq %1, %%mm7\n" "\tmovq %1, %%mm7\n"
@ -344,10 +355,10 @@ xxxgimp_composite_divide_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
: "m" (*rgba8_alpha_mask), "m" (*rgba8_w1) : "m" (*rgba8_alpha_mask), "m" (*rgba8_w1)
: "%mm0", "%mm7"); : "%mm0", "%mm7");
for (; op.n_pixels >= 2; op.n_pixels -= 2) for (; n_pixels >= 2; n_pixels -= 2)
{ {
asm volatile (" movq %0,%%mm0\n" asm volatile (" movq %1,%%mm0\n"
"\tmovq %1,%%mm1\n" "\tmovq %2,%%mm1\n"
"\tpxor %%mm2,%%mm2\n" "\tpxor %%mm2,%%mm2\n"
"\tpunpcklbw %%mm0,%%mm2\n" /* mm2 = A*256 */ "\tpunpcklbw %%mm0,%%mm2\n" /* mm2 = A*256 */
@ -379,19 +390,19 @@ xxxgimp_composite_divide_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
"\tpand %%mm2,%%mm1\n" "\tpand %%mm2,%%mm1\n"
"\tpor %%mm1,%%mm3\n" "\tpor %%mm1,%%mm3\n"
"\tmovq %%mm3,%2\n" "\tmovq %%mm3,%0\n"
: /* empty */ : "=m" (*d)
: "m" (*op.A), "m" (*op.B), "m" (*op.D), "m" (*rgba8_alpha_mask) : "m" (*a), "m" (*b), "m" (*rgba8_alpha_mask)
: "%eax", "%ecx", "%edx", "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"); : "%eax", "%ecx", "%edx", "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
op.A += 8; a++;
op.B += 8; b++;
op.D += 8; d++;
} }
if (op.n_pixels) if (n_pixels)
{ {
asm volatile (" movd %0,%%mm0\n" asm volatile (" movd %1,%%mm0\n"
"\tmovd %1,%%mm1\n" "\tmovd %2,%%mm1\n"
"\tpxor %%mm2,%%mm2\n" "\tpxor %%mm2,%%mm2\n"
"\tpunpcklbw %%mm0,%%mm2\n" /* mm2 = A*256 */ "\tpunpcklbw %%mm0,%%mm2\n" /* mm2 = A*256 */
@ -423,9 +434,9 @@ xxxgimp_composite_divide_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
"\tpand %%mm2,%%mm1\n" "\tpand %%mm2,%%mm1\n"
"\tpor %%mm1,%%mm3\n" "\tpor %%mm1,%%mm3\n"
"\tmovd %%mm3,%2\n" "\tmovd %%mm3,%0\n"
: /* empty */ : "=m" (*d)
: "m" (*op.A), "m" (*op.B), "m" (*op.D), "m" (*rgba8_alpha_mask) : "m" (*a), "m" (*b), "m" (*rgba8_alpha_mask)
: "%eax", "%ecx", "%edx", "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"); : "%eax", "%ecx", "%edx", "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
} }
@ -435,12 +446,15 @@ xxxgimp_composite_divide_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
void void
xxxgimp_composite_dodge_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) xxxgimp_composite_dodge_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
{ {
GimpCompositeContext op = *_op; uint64 *d = (uint64 *) _op->D;
uint64 *a = (uint64 *) _op->A;
uint64 *b = (uint64 *) _op->B;
gulong n_pixels = _op->n_pixels;
for (; op.n_pixels >= 2; op.n_pixels -= 2) for (; n_pixels >= 2; n_pixels -= 2)
{ {
asm volatile (" movq %0,%%mm0\n" asm volatile (" movq %1,%%mm0\n"
"\tmovq %1,%%mm1\n" "\tmovq %2,%%mm1\n"
"\tmovq %%mm1,%%mm3\n" "\tmovq %%mm1,%%mm3\n"
"\tpxor %%mm2,%%mm2\n" "\tpxor %%mm2,%%mm2\n"
"\tpunpcklbw %%mm2,%%mm3\n" "\tpunpcklbw %%mm2,%%mm3\n"
@ -471,16 +485,16 @@ xxxgimp_composite_dodge_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
"\tpor %%mm6,%%mm7\n" "\tpor %%mm6,%%mm7\n"
"\tmovq %%mm7,%2\n" "\tmovq %%mm7,%0\n"
: /* empty */ : "=m" (*d)
: "m" (*op.A), "m" (*op.B), "m" (*op.D), "m" (*rgba8_w256), "m" (*rgba8_alpha_mask) : "m" (*a), "m" (*b), "m" (*rgba8_w256), "m" (*rgba8_alpha_mask)
: "0", "1", "2", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7"); : pdivwuqX_clobber, "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7");
op.A += 8; a++;
op.B += 8; b++;
op.D += 8; d++;
} }
if (op.n_pixels) if (n_pixels)
{ {
asm volatile (" movd %0,%%mm0\n" asm volatile (" movd %0,%%mm0\n"
"\tmovq %1,%%mm1\n" "\tmovq %1,%%mm1\n"
@ -516,8 +530,8 @@ xxxgimp_composite_dodge_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
"\tmovd %%mm7,%2\n" "\tmovd %%mm7,%2\n"
: /* empty */ : /* empty */
: "m" (*op.A), "m" (*op.B), "m" (*op.D), "m" (*rgba8_w256), "m" (*rgba8_alpha_mask) : "m" (*a), "m" (*b), "m" (*d), "m" (*rgba8_w256), "m" (*rgba8_alpha_mask)
: "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"); : pdivwuqX_clobber, "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
} }
asm("emms"); asm("emms");

View File

@ -117,6 +117,7 @@
"movd %%eax,%%" #divisor ";" \ "movd %%eax,%%" #divisor ";" \
"psllq $32,%%" #divisor ";" \ "psllq $32,%%" #divisor ";" \
"por %%" #divisor ",%%" #quotient ";" "por %%" #divisor ",%%" #quotient ";"
#define pdivwuqX_clobber pdivwqX_clobber
#define xmm_pdivwqX(dividend,divisor,quotient,scratch) "movd %%" #dividend ",%%eax; " \ #define xmm_pdivwqX(dividend,divisor,quotient,scratch) "movd %%" #dividend ",%%eax; " \
"movd %%" #divisor ",%%ecx; " \ "movd %%" #divisor ",%%ecx; " \