Regenerated.
* app/composite/gimp-composite-{mmx,sse,sse2,altivec,vis}.c: Regenerated. * app/composite/gimp-composite-regression.[ch]: Ensure all ints are now unsigned longs. * app/composite/gimp-composite-sse2.c (gimp_composite_addition_rgba8_rgba8_rgba8_sse2): Enclosed the troublesome (rather dense) asm construct in __OPTIMIZE__ in which case the compiler can successfully allocate enough registers to load up the asm()
This commit is contained in:
27
ChangeLog
27
ChangeLog
@ -1,3 +1,19 @@
|
|||||||
|
2003-09-16 Helvetix Victorinox <helvetix@gimp.org>
|
||||||
|
|
||||||
|
* app/composite/gimp-composite-{mmx,sse,sse2,altivec,vis}.c: Regenerated.
|
||||||
|
|
||||||
|
* app/composite/gimp-composite-regression.[ch]: Ensure all ints
|
||||||
|
are now unsigned longs.
|
||||||
|
|
||||||
|
* app/composite/gimp-composite-sse2.c (gimp_composite_addition_rgba8_rgba8_rgba8_sse2):
|
||||||
|
Enclosed the troublesome (rather dense) asm construct in
|
||||||
|
__OPTIMIZE__ in which case the compiler can successfully
|
||||||
|
allocate enough registers to load up the asm()
|
||||||
|
|
||||||
|
* app/composite/gimp-composite-sse2.c: Removed code that caused
|
||||||
|
gcc to complain can't find a register in class `GENERAL_REGS' To
|
||||||
|
be revisited later.
|
||||||
|
|
||||||
2003-09-17 Seth Burgess <sjburges@gimp.org>
|
2003-09-17 Seth Burgess <sjburges@gimp.org>
|
||||||
|
|
||||||
* plug-ins/common/sparkle.c: use the rowstride of a pixel region
|
* plug-ins/common/sparkle.c: use the rowstride of a pixel region
|
||||||
@ -71,10 +87,21 @@
|
|||||||
|
|
||||||
2003-09-16 Helvetix Victorinox <helvetix@gimp.org>
|
2003-09-16 Helvetix Victorinox <helvetix@gimp.org>
|
||||||
|
|
||||||
|
* app/composite/gimp-composite-{mmx,sse,sse2,altivec,vis}.c: Regenerated.
|
||||||
|
|
||||||
|
* app/composite/gimp-composite-regression.[ch]: Ensure all ints
|
||||||
|
are now unsigned longs.
|
||||||
|
|
||||||
|
* app/composite/gimp-composite-sse2.c (gimp_composite_addition_rgba8_rgba8_rgba8_sse2):
|
||||||
|
Enclosed the troublesome (rather dense) asm construct in
|
||||||
|
__OPTIMIZE__ in which case the compiler can successfully
|
||||||
|
allocate enough registers to load up the asm()
|
||||||
|
|
||||||
* app/composite/gimp-composite-sse2.c: Removed code that caused
|
* app/composite/gimp-composite-sse2.c: Removed code that caused
|
||||||
gcc to complain can't find a register in class `GENERAL_REGS' To
|
gcc to complain can't find a register in class `GENERAL_REGS' To
|
||||||
be revisited later.
|
be revisited later.
|
||||||
|
|
||||||
|
|
||||||
2003-09-16 Sven Neumann <sven@gimp.org>
|
2003-09-16 Sven Neumann <sven@gimp.org>
|
||||||
|
|
||||||
* app/composite/gimp-composite-generic.c
|
* app/composite/gimp-composite-generic.c
|
||||||
|
@ -71,7 +71,7 @@ main (int argc, char *argv[])
|
|||||||
putenv ("GIMP_COMPOSITE=0x1");
|
putenv ("GIMP_COMPOSITE=0x1");
|
||||||
|
|
||||||
iterations = 1;
|
iterations = 1;
|
||||||
n_pixels = 163921;
|
n_pixels = 1048593;
|
||||||
|
|
||||||
argv++, argc--;
|
argv++, argc--;
|
||||||
while (argc >= 2) {
|
while (argc >= 2) {
|
||||||
|
@ -71,7 +71,7 @@ main (int argc, char *argv[])
|
|||||||
putenv ("GIMP_COMPOSITE=0x1");
|
putenv ("GIMP_COMPOSITE=0x1");
|
||||||
|
|
||||||
iterations = 1;
|
iterations = 1;
|
||||||
n_pixels = 163921;
|
n_pixels = 1048593;
|
||||||
|
|
||||||
argv++, argc--;
|
argv++, argc--;
|
||||||
while (argc >= 2) {
|
while (argc >= 2) {
|
||||||
|
@ -211,7 +211,7 @@ main (int argc, char *argv[])
|
|||||||
putenv ("GIMP_COMPOSITE=0x1");
|
putenv ("GIMP_COMPOSITE=0x1");
|
||||||
|
|
||||||
iterations = 1;
|
iterations = 1;
|
||||||
n_pixels = 163921;
|
n_pixels = 1048593;
|
||||||
|
|
||||||
argv++, argc--;
|
argv++, argc--;
|
||||||
while (argc >= 2) {
|
while (argc >= 2) {
|
||||||
|
@ -978,7 +978,7 @@ gimp_composite_scale_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
|
|||||||
|
|
||||||
for (; op.n_pixels >= 2; op.n_pixels -= 2)
|
for (; op.n_pixels >= 2; op.n_pixels -= 2)
|
||||||
{
|
{
|
||||||
asm volatile ("movq %0,%%mm2\n"
|
asm volatile ("movq %1,%%mm2\n"
|
||||||
"\tmovq %%mm2,%%mm1\n"
|
"\tmovq %%mm2,%%mm1\n"
|
||||||
"\tpunpcklbw %%mm0,%%mm1\n"
|
"\tpunpcklbw %%mm0,%%mm1\n"
|
||||||
"\tmovq %%mm3,%%mm5\n"
|
"\tmovq %%mm3,%%mm5\n"
|
||||||
@ -993,28 +993,28 @@ gimp_composite_scale_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
|
|||||||
|
|
||||||
"\tpackuswb %%mm4,%%mm1\n"
|
"\tpackuswb %%mm4,%%mm1\n"
|
||||||
|
|
||||||
"\tmovq %%mm1,%1\n"
|
"\tmovq %%mm1,%0\n"
|
||||||
: /* empty */
|
: "=m" (*op.D)
|
||||||
: "m" (*op.A), "m" (*op.D)
|
: "m" (*op.A)
|
||||||
: "0", "1", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7");
|
: "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7");
|
||||||
op.A += 8;
|
op.A += 8;
|
||||||
op.D += 8;
|
op.D += 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (op.n_pixels)
|
if (op.n_pixels)
|
||||||
{
|
{
|
||||||
asm volatile ("movd %0,%%mm2\n"
|
asm volatile ("movd %1,%%mm2\n"
|
||||||
"\tmovq %%mm2,%%mm1\n"
|
"\tmovq %%mm2,%%mm1\n"
|
||||||
"\tpunpcklbw %%mm0,%%mm1\n"
|
"\tpunpcklbw %%mm0,%%mm1\n"
|
||||||
"\tmovq %%mm3,%%mm5\n"
|
"\tmovq %%mm3,%%mm5\n"
|
||||||
|
|
||||||
"\t" pmulwX(mm5,mm1,mm7) "\n"
|
"\t" pmulwX(mm5,mm1,mm7) "\n"
|
||||||
|
|
||||||
"\tpackuswb %%mm0,%%mm1\n"
|
"\tpackuswb %%mm0,%%mm1\n"
|
||||||
"\tmovd %%mm1,%1\n"
|
"\tmovd %%mm1,%0\n"
|
||||||
: /* empty */
|
: "=m" (*op.D)
|
||||||
: "m" (*op.A), "m" (*op.D)
|
: "m" (*op.A)
|
||||||
: "0", "1", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7");
|
: "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7");
|
||||||
}
|
}
|
||||||
|
|
||||||
asm("emms");
|
asm("emms");
|
||||||
@ -1149,8 +1149,8 @@ gimp_composite_subtract_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
|
|||||||
|
|
||||||
for (; op.n_pixels >= 2; op.n_pixels -= 2)
|
for (; op.n_pixels >= 2; op.n_pixels -= 2)
|
||||||
{
|
{
|
||||||
asm volatile (" movq %0,%%mm2\n"
|
asm volatile (" movq %1,%%mm2\n"
|
||||||
"\tmovq %1,%%mm3\n"
|
"\tmovq %2,%%mm3\n"
|
||||||
|
|
||||||
"\tmovq %%mm2,%%mm4\n"
|
"\tmovq %%mm2,%%mm4\n"
|
||||||
"\tpsubusb %%mm3,%%mm4\n"
|
"\tpsubusb %%mm3,%%mm4\n"
|
||||||
@ -1162,9 +1162,9 @@ gimp_composite_subtract_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
|
|||||||
|
|
||||||
"\tpand %%mm0,%%mm2\n"
|
"\tpand %%mm0,%%mm2\n"
|
||||||
"\tpor %%mm2,%%mm1\n"
|
"\tpor %%mm2,%%mm1\n"
|
||||||
"\tmovq %%mm1,%2\n"
|
"\tmovq %%mm1,%0\n"
|
||||||
: /* empty */
|
: "=m" (*op.D)
|
||||||
: "m" (*op.A), "m" (*op.B), "m" (*op.D)
|
: "m" (*op.A), "m" (*op.B)
|
||||||
: "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
: "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
||||||
op.A += 8;
|
op.A += 8;
|
||||||
op.B += 8;
|
op.B += 8;
|
||||||
|
@ -134,7 +134,7 @@ gimp_composite_regression_compare_contexts (char *operation, GimpCompositeContex
|
|||||||
int
|
int
|
||||||
gimp_composite_regression_comp_rgba8 (char *str, gimp_rgba8_t *rgba8A, gimp_rgba8_t *rgba8B, gimp_rgba8_t *expected, gimp_rgba8_t *got, u_long length)
|
gimp_composite_regression_comp_rgba8 (char *str, gimp_rgba8_t *rgba8A, gimp_rgba8_t *rgba8B, gimp_rgba8_t *expected, gimp_rgba8_t *got, u_long length)
|
||||||
{
|
{
|
||||||
int i;
|
u_long i;
|
||||||
int failed;
|
int failed;
|
||||||
int fail_count;
|
int fail_count;
|
||||||
|
|
||||||
@ -223,12 +223,12 @@ gimp_composite_regression_timer_report (char *name, double t1, double t2)
|
|||||||
}
|
}
|
||||||
|
|
||||||
double
|
double
|
||||||
gimp_composite_regression_time_function (int iterations, void (*func)(), GimpCompositeContext *ctx)
|
gimp_composite_regression_time_function (u_long iterations, void (*func)(), GimpCompositeContext *ctx)
|
||||||
{
|
{
|
||||||
struct timeval t0;
|
struct timeval t0;
|
||||||
struct timeval t1;
|
struct timeval t1;
|
||||||
struct timeval tv_elapsed;
|
struct timeval tv_elapsed;
|
||||||
int i;
|
u_long i;
|
||||||
|
|
||||||
gettimeofday(&t0, NULL);
|
gettimeofday(&t0, NULL);
|
||||||
for (i = 0; i < iterations; i++) { (*func)(ctx); }
|
for (i = 0; i < iterations; i++) { (*func)(ctx); }
|
||||||
@ -239,10 +239,10 @@ gimp_composite_regression_time_function (int iterations, void (*func)(), GimpCom
|
|||||||
}
|
}
|
||||||
|
|
||||||
gimp_rgba8_t *
|
gimp_rgba8_t *
|
||||||
gimp_composite_regression_random_rgba8 (unsigned long n_pixels)
|
gimp_composite_regression_random_rgba8 (u_long n_pixels)
|
||||||
{
|
{
|
||||||
gimp_rgba8_t *rgba8;
|
gimp_rgba8_t *rgba8;
|
||||||
int i;
|
u_long i;
|
||||||
|
|
||||||
if ((rgba8 = (gimp_rgba8_t *) calloc (sizeof(gimp_rgba8_t), n_pixels))) {
|
if ((rgba8 = (gimp_rgba8_t *) calloc (sizeof(gimp_rgba8_t), n_pixels))) {
|
||||||
for (i = 0; i < n_pixels; i++) {
|
for (i = 0; i < n_pixels; i++) {
|
||||||
@ -257,11 +257,11 @@ gimp_composite_regression_random_rgba8 (unsigned long n_pixels)
|
|||||||
}
|
}
|
||||||
|
|
||||||
gimp_rgba8_t *
|
gimp_rgba8_t *
|
||||||
gimp_composite_regression_fixed_rgba8 (unsigned long n_pixels)
|
gimp_composite_regression_fixed_rgba8 (u_long n_pixels)
|
||||||
{
|
{
|
||||||
gimp_rgba8_t *rgba8;
|
gimp_rgba8_t *rgba8;
|
||||||
int i;
|
u_long i;
|
||||||
int v;
|
u_long v;
|
||||||
|
|
||||||
if ((rgba8 = (gimp_rgba8_t *) calloc(sizeof(gimp_rgba8_t), n_pixels))) {
|
if ((rgba8 = (gimp_rgba8_t *) calloc(sizeof(gimp_rgba8_t), n_pixels))) {
|
||||||
for (i = 0; i < n_pixels; i++) {
|
for (i = 0; i < n_pixels; i++) {
|
||||||
|
@ -85,7 +85,7 @@ typedef struct
|
|||||||
} gimp_rgba32_t;
|
} gimp_rgba32_t;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
extern double gimp_composite_regression_time_function (int, void (*)(), GimpCompositeContext *);
|
extern double gimp_composite_regression_time_function (u_long, void (*)(), GimpCompositeContext *);
|
||||||
extern int gimp_composite_regression_comp_rgba8 (char *, gimp_rgba8_t *, gimp_rgba8_t *, gimp_rgba8_t *, gimp_rgba8_t *, u_long);
|
extern int gimp_composite_regression_comp_rgba8 (char *, gimp_rgba8_t *, gimp_rgba8_t *, gimp_rgba8_t *, gimp_rgba8_t *, u_long);
|
||||||
extern int gimp_composite_regression_comp_va8 (char *, gimp_va8_t *, gimp_va8_t *, gimp_va8_t *, gimp_va8_t *, u_long);
|
extern int gimp_composite_regression_comp_va8 (char *, gimp_va8_t *, gimp_va8_t *, gimp_va8_t *, gimp_va8_t *, u_long);
|
||||||
extern int gimp_composite_regression_compare_contexts (char *, GimpCompositeContext *, GimpCompositeContext *);
|
extern int gimp_composite_regression_compare_contexts (char *, GimpCompositeContext *, GimpCompositeContext *);
|
||||||
@ -94,8 +94,8 @@ extern void gimp_composite_regression_print_rgba8 (gimp_rgba8_t *);
|
|||||||
extern void gimp_composite_regression_print_va8 (gimp_va8_t *);
|
extern void gimp_composite_regression_print_va8 (gimp_va8_t *);
|
||||||
extern void gimp_composite_regression_timer_report (char *, double, double);
|
extern void gimp_composite_regression_timer_report (char *, double, double);
|
||||||
|
|
||||||
extern gimp_rgba8_t *gimp_composite_regression_random_rgba8 (unsigned long);
|
extern gimp_rgba8_t *gimp_composite_regression_random_rgba8 (u_long);
|
||||||
extern gimp_rgba8_t *gimp_composite_regression_fixed_rgba8 (unsigned long);
|
extern gimp_rgba8_t *gimp_composite_regression_fixed_rgba8 (u_long);
|
||||||
extern GimpCompositeContext *gimp_composite_context_init (GimpCompositeContext *,
|
extern GimpCompositeContext *gimp_composite_context_init (GimpCompositeContext *,
|
||||||
GimpCompositeOperation,
|
GimpCompositeOperation,
|
||||||
GimpPixelFormat,
|
GimpPixelFormat,
|
||||||
|
@ -211,7 +211,7 @@ main (int argc, char *argv[])
|
|||||||
putenv ("GIMP_COMPOSITE=0x1");
|
putenv ("GIMP_COMPOSITE=0x1");
|
||||||
|
|
||||||
iterations = 1;
|
iterations = 1;
|
||||||
n_pixels = 163921;
|
n_pixels = 1048593;
|
||||||
|
|
||||||
argv++, argc--;
|
argv++, argc--;
|
||||||
while (argc >= 2) {
|
while (argc >= 2) {
|
||||||
|
@ -151,7 +151,7 @@ main (int argc, char *argv[])
|
|||||||
putenv ("GIMP_COMPOSITE=0x1");
|
putenv ("GIMP_COMPOSITE=0x1");
|
||||||
|
|
||||||
iterations = 1;
|
iterations = 1;
|
||||||
n_pixels = 163921;
|
n_pixels = 1048593;
|
||||||
|
|
||||||
argv++, argc--;
|
argv++, argc--;
|
||||||
while (argc >= 2) {
|
while (argc >= 2) {
|
||||||
|
@ -685,12 +685,13 @@ gimp_composite_swap_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
|||||||
{
|
{
|
||||||
GimpCompositeContext op = *_op;
|
GimpCompositeContext op = *_op;
|
||||||
|
|
||||||
#if 0
|
|
||||||
/*
|
/*
|
||||||
* Inhale one whole i686 cache line at once. 64 bytes, 16 rgba8 pixels, 4 128 bit xmm registers.
|
* Inhale one whole i686 cache line at once. 64 bytes, 16 rgba8
|
||||||
|
* pixels, 4 128 bit xmm registers.
|
||||||
*/
|
*/
|
||||||
for (; op.n_pixels >= 16; op.n_pixels -= 16)
|
for (; op.n_pixels >= 16; op.n_pixels -= 16)
|
||||||
{
|
{
|
||||||
|
#ifdef __OPTIMIZE__
|
||||||
asm volatile (" movdqu %0,%%xmm0\n"
|
asm volatile (" movdqu %0,%%xmm0\n"
|
||||||
"\tmovdqu %1,%%xmm1\n"
|
"\tmovdqu %1,%%xmm1\n"
|
||||||
"\tmovdqu %2,%%xmm2\n"
|
"\tmovdqu %2,%%xmm2\n"
|
||||||
@ -715,10 +716,51 @@ gimp_composite_swap_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
|||||||
: /* empty */
|
: /* empty */
|
||||||
: "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
|
: "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
|
||||||
);
|
);
|
||||||
|
#else
|
||||||
|
asm volatile (" movdqu %0,%%xmm0\n"
|
||||||
|
"\tmovdqu %1,%%xmm1\n"
|
||||||
|
"\tmovdqu %2,%%xmm2\n"
|
||||||
|
"\tmovdqu %3,%%xmm3\n"
|
||||||
|
: "+m" (op.A[0]), "+m" (op.B[0]),
|
||||||
|
"+m" (op.A[1]), "+m" (op.B[1])
|
||||||
|
: /* empty */
|
||||||
|
: "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
|
||||||
|
);
|
||||||
|
|
||||||
|
asm volatile ("\tmovdqu %4,%%xmm4\n"
|
||||||
|
"\tmovdqu %5,%%xmm5\n"
|
||||||
|
"\tmovdqu %6,%%xmm6\n"
|
||||||
|
"\tmovdqu %7,%%xmm7\n"
|
||||||
|
: "+m" (op.A[2]), "+m" (op.B[2]),
|
||||||
|
"+m" (op.A[3]), "+m" (op.B[3])
|
||||||
|
: /* empty */
|
||||||
|
: "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
|
||||||
|
);
|
||||||
|
|
||||||
|
asm volatile ("\tmovdqu %%xmm0,%1\n"
|
||||||
|
"\tmovdqu %%xmm1,%0\n"
|
||||||
|
"\tmovdqu %%xmm2,%3\n"
|
||||||
|
"\tmovdqu %%xmm3,%2\n"
|
||||||
|
: "+m" (op.A[0]), "+m" (op.B[0]),
|
||||||
|
"+m" (op.A[1]), "+m" (op.B[1])
|
||||||
|
: /* empty */
|
||||||
|
: "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
|
||||||
|
);
|
||||||
|
|
||||||
|
asm volatile ("\tmovdqu %%xmm4,%5\n"
|
||||||
|
"\tmovdqu %%xmm5,%4\n"
|
||||||
|
"\tmovdqu %%xmm6,%7\n"
|
||||||
|
"\tmovdqu %%xmm7,%6\n"
|
||||||
|
: "+m" (op.A[2]), "+m" (op.B[2]),
|
||||||
|
"+m" (op.A[3]), "+m" (op.B[3])
|
||||||
|
: /* empty */
|
||||||
|
: "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
|
||||||
|
);
|
||||||
|
#endif
|
||||||
op.A += 64;
|
op.A += 64;
|
||||||
op.B += 64;
|
op.B += 64;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
for (; op.n_pixels >= 4; op.n_pixels -= 4)
|
for (; op.n_pixels >= 4; op.n_pixels -= 4)
|
||||||
{
|
{
|
||||||
|
@ -71,7 +71,7 @@ main (int argc, char *argv[])
|
|||||||
putenv ("GIMP_COMPOSITE=0x1");
|
putenv ("GIMP_COMPOSITE=0x1");
|
||||||
|
|
||||||
iterations = 1;
|
iterations = 1;
|
||||||
n_pixels = 163921;
|
n_pixels = 1048593;
|
||||||
|
|
||||||
argv++, argc--;
|
argv++, argc--;
|
||||||
while (argc >= 2) {
|
while (argc >= 2) {
|
||||||
|
@ -522,7 +522,7 @@ op.add_option('-t', '--test', action='store_true', dest='test',
|
|||||||
help='generate regression testing code')
|
help='generate regression testing code')
|
||||||
op.add_option('-i', '--iterations', action='store', type='int', dest='iterations', default=1,
|
op.add_option('-i', '--iterations', action='store', type='int', dest='iterations', default=1,
|
||||||
help='number of iterations in regression tests')
|
help='number of iterations in regression tests')
|
||||||
op.add_option('-n', '--n-pixels', action='store', type="int", dest='n_pixels', default=64*2049+16*2049+1,
|
op.add_option('-n', '--n-pixels', action='store', type="int", dest='n_pixels', default=128*8192+16+1,
|
||||||
help='number of pixels in each regression test iteration')
|
help='number of pixels in each regression test iteration')
|
||||||
op.add_option('-r', '--requires', action='append', type='string', dest='requires', default=[],
|
op.add_option('-r', '--requires', action='append', type='string', dest='requires', default=[],
|
||||||
help='cpp #if conditionals')
|
help='cpp #if conditionals')
|
||||||
|
Reference in New Issue
Block a user