More clobber register corrections.
This commit is contained in:
@ -1,3 +1,11 @@
|
|||||||
|
2004-07-19 Helvetix Victorinox <helvetix@gimp.org>
|
||||||
|
|
||||||
|
* app/composite/gimp-composite-mmx.c (xxxgimp_composite_dodge_rgba8_rgba8_rgba8_mmx)
|
||||||
|
* app/composite/gimp-composite-mmx.c (xxxgimp_composite_divide_rgba8_rgba8_rgba8_mmx)
|
||||||
|
* app/composite/gimp-composite-mmx.c (gimp_composite_difference_rgba8_rgba8_rgba8_mmx)
|
||||||
|
* app/composite/gimp-composite-mmx.c (gimp_composite_darken_rgba8_rgba8_rgba8_mmx):
|
||||||
|
More clobber register corrections.
|
||||||
|
|
||||||
2004-07-20 Sven Neumann <sven@gimp.org>
|
2004-07-20 Sven Neumann <sven@gimp.org>
|
||||||
|
|
||||||
* Made 2.1.2 release.
|
* Made 2.1.2 release.
|
||||||
|
@ -85,13 +85,14 @@ gimp_composite_addition_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
|
|||||||
uint64 *d = (uint64 *) _op->D;
|
uint64 *d = (uint64 *) _op->D;
|
||||||
uint64 *a = (uint64 *) _op->A;
|
uint64 *a = (uint64 *) _op->A;
|
||||||
uint64 *b = (uint64 *) _op->B;
|
uint64 *b = (uint64 *) _op->B;
|
||||||
|
gulong n_pixels = _op->n_pixels;
|
||||||
|
|
||||||
asm volatile ("movq %0,%%mm0"
|
asm volatile ("movq %0,%%mm0"
|
||||||
: /* empty */
|
: /* empty */
|
||||||
: "m" (*rgba8_alpha_mask)
|
: "m" (*rgba8_alpha_mask)
|
||||||
: "%mm0");
|
: "%mm0");
|
||||||
|
|
||||||
for (; _op->n_pixels >= 2; _op->n_pixels -= 2)
|
for (; n_pixels >= 2; n_pixels -= 2)
|
||||||
{
|
{
|
||||||
asm volatile (" movq %1, %%mm2\n"
|
asm volatile (" movq %1, %%mm2\n"
|
||||||
"\tmovq %2, %%mm3\n"
|
"\tmovq %2, %%mm3\n"
|
||||||
@ -111,7 +112,7 @@ gimp_composite_addition_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
|
|||||||
d++;
|
d++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (_op->n_pixels)
|
if (n_pixels > 0)
|
||||||
{
|
{
|
||||||
asm volatile (" movd %1, %%mm2\n"
|
asm volatile (" movd %1, %%mm2\n"
|
||||||
"\tmovd %2, %%mm3\n"
|
"\tmovd %2, %%mm3\n"
|
||||||
@ -137,8 +138,9 @@ gimp_composite_burn_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
|
|||||||
uint64 *d = (uint64 *) _op->D;
|
uint64 *d = (uint64 *) _op->D;
|
||||||
uint64 *a = (uint64 *) _op->A;
|
uint64 *a = (uint64 *) _op->A;
|
||||||
uint64 *b = (uint64 *) _op->B;
|
uint64 *b = (uint64 *) _op->B;
|
||||||
|
gulong n_pixels = _op->n_pixels;
|
||||||
|
|
||||||
for (; _op->n_pixels >= 2; _op->n_pixels -= 2)
|
for (; n_pixels >= 2; n_pixels -= 2)
|
||||||
{
|
{
|
||||||
asm volatile (" movq %1,%%mm0\n"
|
asm volatile (" movq %1,%%mm0\n"
|
||||||
"\tmovq %2,%%mm1\n"
|
"\tmovq %2,%%mm1\n"
|
||||||
@ -192,7 +194,7 @@ gimp_composite_burn_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
|
|||||||
a++;
|
a++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (_op->n_pixels)
|
if (n_pixels > 0)
|
||||||
{
|
{
|
||||||
asm volatile (" movd %1,%%mm0\n"
|
asm volatile (" movd %1,%%mm0\n"
|
||||||
"\tmovd %2,%%mm1\n"
|
"\tmovd %2,%%mm1\n"
|
||||||
@ -250,31 +252,34 @@ gimp_composite_burn_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
|
|||||||
void
|
void
|
||||||
gimp_composite_darken_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
|
gimp_composite_darken_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
|
||||||
{
|
{
|
||||||
GimpCompositeContext op = *_op;
|
uint64 *d = (uint64 *) _op->D;
|
||||||
|
uint64 *a = (uint64 *) _op->A;
|
||||||
|
uint64 *b = (uint64 *) _op->B;
|
||||||
|
gulong n_pixels = _op->n_pixels;
|
||||||
|
|
||||||
for (; op.n_pixels >= 2; op.n_pixels -= 2)
|
for (; n_pixels >= 2; n_pixels -= 2)
|
||||||
{
|
{
|
||||||
asm volatile (" movq %1, %%mm2\n"
|
asm volatile (" movq %1, %%mm2\n"
|
||||||
"\tmovq %2, %%mm3\n"
|
"\tmovq %2, %%mm3\n"
|
||||||
"\t" pminub(mm3, mm2, mm4) "\n"
|
"\t" pminub(mm3, mm2, mm4) "\n"
|
||||||
"\tmovq %%mm2, %0\n"
|
"\tmovq %%mm2, %0\n"
|
||||||
: "=m" (*op.D)
|
: "=m" (*d)
|
||||||
: "m" (*op.A), "m" (*op.B)
|
: "m" (*a), "m" (*b)
|
||||||
: "%mm1", "%mm2", "%mm3", "%mm4");
|
: "%mm1", "%mm2", "%mm3", "%mm4");
|
||||||
op.A += 8;
|
a++;
|
||||||
op.B += 8;
|
b++;
|
||||||
op.D += 8;
|
d++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (op.n_pixels)
|
if (n_pixels > 0)
|
||||||
{
|
{
|
||||||
asm volatile (" movd %0, %%mm2\n"
|
asm volatile (" movd %1, %%mm2\n"
|
||||||
"\tmovd %1, %%mm3\n"
|
"\tmovd %2, %%mm3\n"
|
||||||
"\t" pminub(mm3, mm2, mm4) "\n"
|
"\t" pminub(mm3, mm2, mm4) "\n"
|
||||||
"\tmovd %%mm2, %2\n"
|
"\tmovd %%mm2, %0\n"
|
||||||
: /* empty */
|
: "=m" (*d)
|
||||||
: "m" (*op.A), "m" (*op.B), "m" (*op.D)
|
: "m" (*a), "m" (*b)
|
||||||
: "0", "1", "2", "%mm2", "%mm3", "%mm4");
|
: "%mm2", "%mm3", "%mm4");
|
||||||
}
|
}
|
||||||
|
|
||||||
asm("emms");
|
asm("emms");
|
||||||
@ -283,14 +288,17 @@ gimp_composite_darken_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
|
|||||||
void
|
void
|
||||||
gimp_composite_difference_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
|
gimp_composite_difference_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
|
||||||
{
|
{
|
||||||
GimpCompositeContext op = *_op;
|
uint64 *d = (uint64 *) _op->D;
|
||||||
|
uint64 *a = (uint64 *) _op->A;
|
||||||
|
uint64 *b = (uint64 *) _op->B;
|
||||||
|
gulong n_pixels = _op->n_pixels;
|
||||||
|
|
||||||
asm volatile ("movq %0,%%mm0" : : "m" (*rgba8_alpha_mask) : "%mm0");
|
asm volatile ("movq %0,%%mm0" : : "m" (*rgba8_alpha_mask) : "%mm0");
|
||||||
|
|
||||||
for (; op.n_pixels >= 2; op.n_pixels -= 2)
|
for (; n_pixels >= 2; n_pixels -= 2)
|
||||||
{
|
{
|
||||||
asm volatile (" movq %0, %%mm2\n"
|
asm volatile (" movq %1, %%mm2\n"
|
||||||
"\tmovq %1, %%mm3\n"
|
"\tmovq %2, %%mm3\n"
|
||||||
"\tmovq %%mm2, %%mm4\n"
|
"\tmovq %%mm2, %%mm4\n"
|
||||||
"\tmovq %%mm3, %%mm5\n"
|
"\tmovq %%mm3, %%mm5\n"
|
||||||
"\tpsubusb %%mm3, %%mm4\n"
|
"\tpsubusb %%mm3, %%mm4\n"
|
||||||
@ -301,19 +309,19 @@ gimp_composite_difference_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
|
|||||||
"\t" pminub(mm3,mm2,mm4) "\n"
|
"\t" pminub(mm3,mm2,mm4) "\n"
|
||||||
"\tpand %%mm0, %%mm2\n"
|
"\tpand %%mm0, %%mm2\n"
|
||||||
"\tpor %%mm2, %%mm1\n"
|
"\tpor %%mm2, %%mm1\n"
|
||||||
"\tmovq %%mm1, %2\n"
|
"\tmovq %%mm1, %0\n"
|
||||||
: /* empty */
|
: "=m" (*d)
|
||||||
: "m" (*op.A), "m" (*op.B), "m" (*op.D)
|
: "m" (*a), "m" (*b)
|
||||||
: "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4");
|
: "%mm1", "%mm2", "%mm3", "%mm4");
|
||||||
op.A += 8;
|
a++;
|
||||||
op.B += 8;
|
b++;
|
||||||
op.D += 8;
|
d++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (op.n_pixels)
|
if (n_pixels)
|
||||||
{
|
{
|
||||||
asm volatile (" movd %0, %%mm2\n"
|
asm volatile (" movd %1, %%mm2\n"
|
||||||
"\tmovd %1, %%mm3\n"
|
"\tmovd %2, %%mm3\n"
|
||||||
"\tmovq %%mm2, %%mm4\n"
|
"\tmovq %%mm2, %%mm4\n"
|
||||||
"\tmovq %%mm3, %%mm5\n"
|
"\tmovq %%mm3, %%mm5\n"
|
||||||
"\tpsubusb %%mm3, %%mm4\n"
|
"\tpsubusb %%mm3, %%mm4\n"
|
||||||
@ -324,10 +332,10 @@ gimp_composite_difference_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
|
|||||||
"\t" pminub(mm3,mm2,mm4) "\n"
|
"\t" pminub(mm3,mm2,mm4) "\n"
|
||||||
"\tpand %%mm0, %%mm2\n"
|
"\tpand %%mm0, %%mm2\n"
|
||||||
"\tpor %%mm2, %%mm1\n"
|
"\tpor %%mm2, %%mm1\n"
|
||||||
"\tmovd %%mm1, %2\n"
|
"\tmovd %%mm1, %0\n"
|
||||||
: /* empty */
|
: "=m" (*d)
|
||||||
: "m" (*op.A), "m" (*op.B), "m" (*op.D)
|
: "m" (*a), "m" (*b)
|
||||||
: "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4");
|
: "%mm1", "%mm2", "%mm3", "%mm4");
|
||||||
}
|
}
|
||||||
|
|
||||||
asm("emms");
|
asm("emms");
|
||||||
@ -336,7 +344,10 @@ gimp_composite_difference_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
|
|||||||
void
|
void
|
||||||
xxxgimp_composite_divide_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
|
xxxgimp_composite_divide_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
|
||||||
{
|
{
|
||||||
GimpCompositeContext op = *_op;
|
uint64 *d = (uint64 *) _op->D;
|
||||||
|
uint64 *a = (uint64 *) _op->A;
|
||||||
|
uint64 *b = (uint64 *) _op->B;
|
||||||
|
gulong n_pixels = _op->n_pixels;
|
||||||
|
|
||||||
asm volatile (" movq %0, %%mm0\n"
|
asm volatile (" movq %0, %%mm0\n"
|
||||||
"\tmovq %1, %%mm7\n"
|
"\tmovq %1, %%mm7\n"
|
||||||
@ -344,10 +355,10 @@ xxxgimp_composite_divide_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
|
|||||||
: "m" (*rgba8_alpha_mask), "m" (*rgba8_w1)
|
: "m" (*rgba8_alpha_mask), "m" (*rgba8_w1)
|
||||||
: "%mm0", "%mm7");
|
: "%mm0", "%mm7");
|
||||||
|
|
||||||
for (; op.n_pixels >= 2; op.n_pixels -= 2)
|
for (; n_pixels >= 2; n_pixels -= 2)
|
||||||
{
|
{
|
||||||
asm volatile (" movq %0,%%mm0\n"
|
asm volatile (" movq %1,%%mm0\n"
|
||||||
"\tmovq %1,%%mm1\n"
|
"\tmovq %2,%%mm1\n"
|
||||||
"\tpxor %%mm2,%%mm2\n"
|
"\tpxor %%mm2,%%mm2\n"
|
||||||
"\tpunpcklbw %%mm0,%%mm2\n" /* mm2 = A*256 */
|
"\tpunpcklbw %%mm0,%%mm2\n" /* mm2 = A*256 */
|
||||||
|
|
||||||
@ -379,19 +390,19 @@ xxxgimp_composite_divide_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
|
|||||||
"\tpand %%mm2,%%mm1\n"
|
"\tpand %%mm2,%%mm1\n"
|
||||||
"\tpor %%mm1,%%mm3\n"
|
"\tpor %%mm1,%%mm3\n"
|
||||||
|
|
||||||
"\tmovq %%mm3,%2\n"
|
"\tmovq %%mm3,%0\n"
|
||||||
: /* empty */
|
: "=m" (*d)
|
||||||
: "m" (*op.A), "m" (*op.B), "m" (*op.D), "m" (*rgba8_alpha_mask)
|
: "m" (*a), "m" (*b), "m" (*rgba8_alpha_mask)
|
||||||
: "%eax", "%ecx", "%edx", "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
: "%eax", "%ecx", "%edx", "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
||||||
op.A += 8;
|
a++;
|
||||||
op.B += 8;
|
b++;
|
||||||
op.D += 8;
|
d++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (op.n_pixels)
|
if (n_pixels)
|
||||||
{
|
{
|
||||||
asm volatile (" movd %0,%%mm0\n"
|
asm volatile (" movd %1,%%mm0\n"
|
||||||
"\tmovd %1,%%mm1\n"
|
"\tmovd %2,%%mm1\n"
|
||||||
"\tpxor %%mm2,%%mm2\n"
|
"\tpxor %%mm2,%%mm2\n"
|
||||||
"\tpunpcklbw %%mm0,%%mm2\n" /* mm2 = A*256 */
|
"\tpunpcklbw %%mm0,%%mm2\n" /* mm2 = A*256 */
|
||||||
|
|
||||||
@ -423,9 +434,9 @@ xxxgimp_composite_divide_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
|
|||||||
"\tpand %%mm2,%%mm1\n"
|
"\tpand %%mm2,%%mm1\n"
|
||||||
"\tpor %%mm1,%%mm3\n"
|
"\tpor %%mm1,%%mm3\n"
|
||||||
|
|
||||||
"\tmovd %%mm3,%2\n"
|
"\tmovd %%mm3,%0\n"
|
||||||
: /* empty */
|
: "=m" (*d)
|
||||||
: "m" (*op.A), "m" (*op.B), "m" (*op.D), "m" (*rgba8_alpha_mask)
|
: "m" (*a), "m" (*b), "m" (*rgba8_alpha_mask)
|
||||||
: "%eax", "%ecx", "%edx", "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
: "%eax", "%ecx", "%edx", "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -435,12 +446,15 @@ xxxgimp_composite_divide_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
|
|||||||
void
|
void
|
||||||
xxxgimp_composite_dodge_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
|
xxxgimp_composite_dodge_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
|
||||||
{
|
{
|
||||||
GimpCompositeContext op = *_op;
|
uint64 *d = (uint64 *) _op->D;
|
||||||
|
uint64 *a = (uint64 *) _op->A;
|
||||||
|
uint64 *b = (uint64 *) _op->B;
|
||||||
|
gulong n_pixels = _op->n_pixels;
|
||||||
|
|
||||||
for (; op.n_pixels >= 2; op.n_pixels -= 2)
|
for (; n_pixels >= 2; n_pixels -= 2)
|
||||||
{
|
{
|
||||||
asm volatile (" movq %0,%%mm0\n"
|
asm volatile (" movq %1,%%mm0\n"
|
||||||
"\tmovq %1,%%mm1\n"
|
"\tmovq %2,%%mm1\n"
|
||||||
"\tmovq %%mm1,%%mm3\n"
|
"\tmovq %%mm1,%%mm3\n"
|
||||||
"\tpxor %%mm2,%%mm2\n"
|
"\tpxor %%mm2,%%mm2\n"
|
||||||
"\tpunpcklbw %%mm2,%%mm3\n"
|
"\tpunpcklbw %%mm2,%%mm3\n"
|
||||||
@ -471,16 +485,16 @@ xxxgimp_composite_dodge_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
|
|||||||
|
|
||||||
"\tpor %%mm6,%%mm7\n"
|
"\tpor %%mm6,%%mm7\n"
|
||||||
|
|
||||||
"\tmovq %%mm7,%2\n"
|
"\tmovq %%mm7,%0\n"
|
||||||
: /* empty */
|
: "=m" (*d)
|
||||||
: "m" (*op.A), "m" (*op.B), "m" (*op.D), "m" (*rgba8_w256), "m" (*rgba8_alpha_mask)
|
: "m" (*a), "m" (*b), "m" (*rgba8_w256), "m" (*rgba8_alpha_mask)
|
||||||
: "0", "1", "2", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7");
|
: pdivwuqX_clobber, "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7");
|
||||||
op.A += 8;
|
a++;
|
||||||
op.B += 8;
|
b++;
|
||||||
op.D += 8;
|
d++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (op.n_pixels)
|
if (n_pixels)
|
||||||
{
|
{
|
||||||
asm volatile (" movd %0,%%mm0\n"
|
asm volatile (" movd %0,%%mm0\n"
|
||||||
"\tmovq %1,%%mm1\n"
|
"\tmovq %1,%%mm1\n"
|
||||||
@ -516,8 +530,8 @@ xxxgimp_composite_dodge_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
|
|||||||
|
|
||||||
"\tmovd %%mm7,%2\n"
|
"\tmovd %%mm7,%2\n"
|
||||||
: /* empty */
|
: /* empty */
|
||||||
: "m" (*op.A), "m" (*op.B), "m" (*op.D), "m" (*rgba8_w256), "m" (*rgba8_alpha_mask)
|
: "m" (*a), "m" (*b), "m" (*d), "m" (*rgba8_w256), "m" (*rgba8_alpha_mask)
|
||||||
: "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
: pdivwuqX_clobber, "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
||||||
}
|
}
|
||||||
|
|
||||||
asm("emms");
|
asm("emms");
|
||||||
|
@ -117,6 +117,7 @@
|
|||||||
"movd %%eax,%%" #divisor ";" \
|
"movd %%eax,%%" #divisor ";" \
|
||||||
"psllq $32,%%" #divisor ";" \
|
"psllq $32,%%" #divisor ";" \
|
||||||
"por %%" #divisor ",%%" #quotient ";"
|
"por %%" #divisor ",%%" #quotient ";"
|
||||||
|
#define pdivwuqX_clobber pdivwqX_clobber
|
||||||
|
|
||||||
#define xmm_pdivwqX(dividend,divisor,quotient,scratch) "movd %%" #dividend ",%%eax; " \
|
#define xmm_pdivwqX(dividend,divisor,quotient,scratch) "movd %%" #dividend ",%%eax; " \
|
||||||
"movd %%" #divisor ",%%ecx; " \
|
"movd %%" #divisor ",%%ecx; " \
|
||||||
|
Reference in New Issue
Block a user