Regenerated.

* app/composite/gimp-composite-{mmx,sse,sse2,altivec,vis}.c: Regenerated.

* app/composite/gimp-composite-regression.[ch]: Ensure all ints
  are now unsigned longs.

* app/composite/gimp-composite-sse2.c (gimp_composite_addition_rgba8_rgba8_rgba8_sse2):
  Enclosed the troublesome (rather dense) asm construct in
  __OPTIMIZE__ in which case the compiler can successfully
  allocate enough registers to load up the asm()
This commit is contained in:
Helvetix Victorinox
2003-09-17 05:43:28 +00:00
parent d9996103cc
commit 9c64fde3d1
12 changed files with 110 additions and 41 deletions

View File

@ -1,3 +1,19 @@
2003-09-16 Helvetix Victorinox <helvetix@gimp.org>
* app/composite/gimp-composite-{mmx,sse,sse2,altivec,vis}.c: Regenerated.
* app/composite/gimp-composite-regression.[ch]: Ensure all ints
are now unsigned longs.
* app/composite/gimp-composite-sse2.c (gimp_composite_addition_rgba8_rgba8_rgba8_sse2):
Enclosed the troublesome (rather dense) asm construct in
__OPTIMIZE__ in which case the compiler can successfully
allocate enough registers to load up the asm()
* app/composite/gimp-composite-sse2.c: Removed code that caused
gcc to complain can't find a register in class `GENERAL_REGS' To
be revisited later.
2003-09-17 Seth Burgess <sjburges@gimp.org>
* plug-ins/common/sparkle.c: use the rowstride of a pixel region
@ -71,10 +87,21 @@
2003-09-16 Helvetix Victorinox <helvetix@gimp.org>
* app/composite/gimp-composite-{mmx,sse,sse2,altivec,vis}.c: Regenerated.
* app/composite/gimp-composite-regression.[ch]: Ensure all ints
are now unsigned longs.
* app/composite/gimp-composite-sse2.c (gimp_composite_addition_rgba8_rgba8_rgba8_sse2):
Enclosed the troublesome (rather dense) asm construct in
__OPTIMIZE__ in which case the compiler can successfully
allocate enough registers to load up the asm()
* app/composite/gimp-composite-sse2.c: Removed code that caused
gcc to complain can't find a register in class `GENERAL_REGS' To
be revisited later.
2003-09-16 Sven Neumann <sven@gimp.org>
* app/composite/gimp-composite-generic.c

View File

@ -71,7 +71,7 @@ main (int argc, char *argv[])
putenv ("GIMP_COMPOSITE=0x1");
iterations = 1;
n_pixels = 163921;
n_pixels = 1048593;
argv++, argc--;
while (argc >= 2) {

View File

@ -71,7 +71,7 @@ main (int argc, char *argv[])
putenv ("GIMP_COMPOSITE=0x1");
iterations = 1;
n_pixels = 163921;
n_pixels = 1048593;
argv++, argc--;
while (argc >= 2) {

View File

@ -211,7 +211,7 @@ main (int argc, char *argv[])
putenv ("GIMP_COMPOSITE=0x1");
iterations = 1;
n_pixels = 163921;
n_pixels = 1048593;
argv++, argc--;
while (argc >= 2) {

View File

@ -978,7 +978,7 @@ gimp_composite_scale_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
for (; op.n_pixels >= 2; op.n_pixels -= 2)
{
asm volatile ("movq %0,%%mm2\n"
asm volatile ("movq %1,%%mm2\n"
"\tmovq %%mm2,%%mm1\n"
"\tpunpcklbw %%mm0,%%mm1\n"
"\tmovq %%mm3,%%mm5\n"
@ -993,28 +993,28 @@ gimp_composite_scale_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
"\tpackuswb %%mm4,%%mm1\n"
"\tmovq %%mm1,%1\n"
: /* empty */
: "m" (*op.A), "m" (*op.D)
: "0", "1", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7");
"\tmovq %%mm1,%0\n"
: "=m" (*op.D)
: "m" (*op.A)
: "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7");
op.A += 8;
op.D += 8;
}
if (op.n_pixels)
{
asm volatile ("movd %0,%%mm2\n"
"\tmovq %%mm2,%%mm1\n"
"\tpunpcklbw %%mm0,%%mm1\n"
"\tmovq %%mm3,%%mm5\n"
asm volatile ("movd %1,%%mm2\n"
"\tmovq %%mm2,%%mm1\n"
"\tpunpcklbw %%mm0,%%mm1\n"
"\tmovq %%mm3,%%mm5\n"
"\t" pmulwX(mm5,mm1,mm7) "\n"
"\t" pmulwX(mm5,mm1,mm7) "\n"
"\tpackuswb %%mm0,%%mm1\n"
"\tmovd %%mm1,%1\n"
: /* empty */
: "m" (*op.A), "m" (*op.D)
: "0", "1", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7");
"\tpackuswb %%mm0,%%mm1\n"
"\tmovd %%mm1,%0\n"
: "=m" (*op.D)
: "m" (*op.A)
: "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7");
}
asm("emms");
@ -1149,8 +1149,8 @@ gimp_composite_subtract_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
for (; op.n_pixels >= 2; op.n_pixels -= 2)
{
asm volatile (" movq %0,%%mm2\n"
"\tmovq %1,%%mm3\n"
asm volatile (" movq %1,%%mm2\n"
"\tmovq %2,%%mm3\n"
"\tmovq %%mm2,%%mm4\n"
"\tpsubusb %%mm3,%%mm4\n"
@ -1162,9 +1162,9 @@ gimp_composite_subtract_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op)
"\tpand %%mm0,%%mm2\n"
"\tpor %%mm2,%%mm1\n"
"\tmovq %%mm1,%2\n"
: /* empty */
: "m" (*op.A), "m" (*op.B), "m" (*op.D)
"\tmovq %%mm1,%0\n"
: "=m" (*op.D)
: "m" (*op.A), "m" (*op.B)
: "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
op.A += 8;
op.B += 8;

View File

@ -134,7 +134,7 @@ gimp_composite_regression_compare_contexts (char *operation, GimpCompositeContex
int
gimp_composite_regression_comp_rgba8 (char *str, gimp_rgba8_t *rgba8A, gimp_rgba8_t *rgba8B, gimp_rgba8_t *expected, gimp_rgba8_t *got, u_long length)
{
int i;
u_long i;
int failed;
int fail_count;
@ -223,12 +223,12 @@ gimp_composite_regression_timer_report (char *name, double t1, double t2)
}
double
gimp_composite_regression_time_function (int iterations, void (*func)(), GimpCompositeContext *ctx)
gimp_composite_regression_time_function (u_long iterations, void (*func)(), GimpCompositeContext *ctx)
{
struct timeval t0;
struct timeval t1;
struct timeval tv_elapsed;
int i;
u_long i;
gettimeofday(&t0, NULL);
for (i = 0; i < iterations; i++) { (*func)(ctx); }
@ -239,10 +239,10 @@ gimp_composite_regression_time_function (int iterations, void (*func)(), GimpCom
}
gimp_rgba8_t *
gimp_composite_regression_random_rgba8 (unsigned long n_pixels)
gimp_composite_regression_random_rgba8 (u_long n_pixels)
{
gimp_rgba8_t *rgba8;
int i;
u_long i;
if ((rgba8 = (gimp_rgba8_t *) calloc (sizeof(gimp_rgba8_t), n_pixels))) {
for (i = 0; i < n_pixels; i++) {
@ -257,11 +257,11 @@ gimp_composite_regression_random_rgba8 (unsigned long n_pixels)
}
gimp_rgba8_t *
gimp_composite_regression_fixed_rgba8 (unsigned long n_pixels)
gimp_composite_regression_fixed_rgba8 (u_long n_pixels)
{
gimp_rgba8_t *rgba8;
int i;
int v;
u_long i;
u_long v;
if ((rgba8 = (gimp_rgba8_t *) calloc(sizeof(gimp_rgba8_t), n_pixels))) {
for (i = 0; i < n_pixels; i++) {

View File

@ -85,7 +85,7 @@ typedef struct
} gimp_rgba32_t;
#endif
extern double gimp_composite_regression_time_function (int, void (*)(), GimpCompositeContext *);
extern double gimp_composite_regression_time_function (u_long, void (*)(), GimpCompositeContext *);
extern int gimp_composite_regression_comp_rgba8 (char *, gimp_rgba8_t *, gimp_rgba8_t *, gimp_rgba8_t *, gimp_rgba8_t *, u_long);
extern int gimp_composite_regression_comp_va8 (char *, gimp_va8_t *, gimp_va8_t *, gimp_va8_t *, gimp_va8_t *, u_long);
extern int gimp_composite_regression_compare_contexts (char *, GimpCompositeContext *, GimpCompositeContext *);
@ -94,8 +94,8 @@ extern void gimp_composite_regression_print_rgba8 (gimp_rgba8_t *);
extern void gimp_composite_regression_print_va8 (gimp_va8_t *);
extern void gimp_composite_regression_timer_report (char *, double, double);
extern gimp_rgba8_t *gimp_composite_regression_random_rgba8 (unsigned long);
extern gimp_rgba8_t *gimp_composite_regression_fixed_rgba8 (unsigned long);
extern gimp_rgba8_t *gimp_composite_regression_random_rgba8 (u_long);
extern gimp_rgba8_t *gimp_composite_regression_fixed_rgba8 (u_long);
extern GimpCompositeContext *gimp_composite_context_init (GimpCompositeContext *,
GimpCompositeOperation,
GimpPixelFormat,

View File

@ -211,7 +211,7 @@ main (int argc, char *argv[])
putenv ("GIMP_COMPOSITE=0x1");
iterations = 1;
n_pixels = 163921;
n_pixels = 1048593;
argv++, argc--;
while (argc >= 2) {

View File

@ -151,7 +151,7 @@ main (int argc, char *argv[])
putenv ("GIMP_COMPOSITE=0x1");
iterations = 1;
n_pixels = 163921;
n_pixels = 1048593;
argv++, argc--;
while (argc >= 2) {

View File

@ -685,12 +685,13 @@ gimp_composite_swap_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
{
GimpCompositeContext op = *_op;
#if 0
/*
* Inhale one whole i686 cache line at once. 64 bytes, 16 rgba8 pixels, 4 128 bit xmm registers.
* Inhale one whole i686 cache line at once. 64 bytes, 16 rgba8
* pixels, 4 128 bit xmm registers.
*/
for (; op.n_pixels >= 16; op.n_pixels -= 16)
{
#ifdef __OPTIMIZE__
asm volatile (" movdqu %0,%%xmm0\n"
"\tmovdqu %1,%%xmm1\n"
"\tmovdqu %2,%%xmm2\n"
@ -715,10 +716,51 @@ gimp_composite_swap_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
: /* empty */
: "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
);
#else
asm volatile (" movdqu %0,%%xmm0\n"
"\tmovdqu %1,%%xmm1\n"
"\tmovdqu %2,%%xmm2\n"
"\tmovdqu %3,%%xmm3\n"
: "+m" (op.A[0]), "+m" (op.B[0]),
"+m" (op.A[1]), "+m" (op.B[1])
: /* empty */
: "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
);
asm volatile ("\tmovdqu %4,%%xmm4\n"
"\tmovdqu %5,%%xmm5\n"
"\tmovdqu %6,%%xmm6\n"
"\tmovdqu %7,%%xmm7\n"
: "+m" (op.A[2]), "+m" (op.B[2]),
"+m" (op.A[3]), "+m" (op.B[3])
: /* empty */
: "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
);
asm volatile ("\tmovdqu %%xmm0,%1\n"
"\tmovdqu %%xmm1,%0\n"
"\tmovdqu %%xmm2,%3\n"
"\tmovdqu %%xmm3,%2\n"
: "+m" (op.A[0]), "+m" (op.B[0]),
"+m" (op.A[1]), "+m" (op.B[1])
: /* empty */
: "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
);
asm volatile ("\tmovdqu %%xmm4,%5\n"
"\tmovdqu %%xmm5,%4\n"
"\tmovdqu %%xmm6,%7\n"
"\tmovdqu %%xmm7,%6\n"
: "+m" (op.A[2]), "+m" (op.B[2]),
"+m" (op.A[3]), "+m" (op.B[3])
: /* empty */
: "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
);
#endif
op.A += 64;
op.B += 64;
}
#endif
for (; op.n_pixels >= 4; op.n_pixels -= 4)
{

View File

@ -71,7 +71,7 @@ main (int argc, char *argv[])
putenv ("GIMP_COMPOSITE=0x1");
iterations = 1;
n_pixels = 163921;
n_pixels = 1048593;
argv++, argc--;
while (argc >= 2) {

View File

@ -522,7 +522,7 @@ op.add_option('-t', '--test', action='store_true', dest='test',
help='generate regression testing code')
op.add_option('-i', '--iterations', action='store', type='int', dest='iterations', default=1,
help='number of iterations in regression tests')
op.add_option('-n', '--n-pixels', action='store', type="int", dest='n_pixels', default=64*2049+16*2049+1,
op.add_option('-n', '--n-pixels', action='store', type="int", dest='n_pixels', default=128*8192+16+1,
help='number of pixels in each regression test iteration')
op.add_option('-r', '--requires', action='append', type='string', dest='requires', default=[],
help='cpp #if conditionals')