MMX paint_funcs Should autodetect whether the assembler handles MMX and

MMX paint_funcs
Should autodetect whether the assembler handles MMX and whether the C
compiler allows gcc-like __attribute function specifications.
Should detect MMX at run-time.
This commit is contained in:
David Monniaux
2001-02-10 21:14:54 +00:00
parent f9e3411237
commit 46c62c8feb
10 changed files with 1974 additions and 7 deletions

View File

@ -16,6 +16,7 @@
#undef ENABLE_MP #undef ENABLE_MP
#undef ENABLE_NLS #undef ENABLE_NLS
#undef HAVE_ASM_MMX
#undef HAVE_CATGETS #undef HAVE_CATGETS
#undef HAVE_DIRENT_H #undef HAVE_DIRENT_H
#undef HAVE_DOPRNT #undef HAVE_DOPRNT
@ -49,7 +50,6 @@
#undef SRAND_FUNC #undef SRAND_FUNC
#undef USE_PTHREADS #undef USE_PTHREADS
/* Leave that blank line there!! Autoheader needs it. /* Leave that blank line there!! Autoheader needs it.
If you're adding to this file, keep in mind: If you're adding to this file, keep in mind:

View File

@ -314,7 +314,8 @@ gimp_SOURCES = \
marching_ants.h \ marching_ants.h \
pixmaps.h \ pixmaps.h \
pixmaps2.h \ pixmaps2.h \
wilber.h wilber.h \
paint_funcs_simd.S
EXTRA_DIST = \ EXTRA_DIST = \
makefile.mingw \ makefile.mingw \
@ -322,7 +323,8 @@ EXTRA_DIST = \
makefile.msc \ makefile.msc \
gimp.rc \ gimp.rc \
gimp.sym \ gimp.sym \
wilber.ico wilber.ico \
arch/i386/mmx/paint_funcs_mmx.S
gimp_LDFLAGS = -export-dynamic -export-symbols $(srcdir)/gimp.sym gimp_LDFLAGS = -export-dynamic -export-symbols $(srcdir)/gimp.sym

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,360 @@
/*
MMX code to supplement some functions in paint_funcs.c
for the Gimp.
Copyright (C) 1999, 2001 David Monniaux
*/
.text
.align 4
.globl intel_cpu_features
intel_cpu_features:
pushl %ebx
pushfl
popl %eax
xor $ 0x200000, %eax
pushl %eax
popfl
pushfl
popl %edx
xor %eax, %edx
xor %eax, %eax
test $ 0x200000, %edx
jnz .intel_cpu_features_end
movl $ 1, %eax
cpuid
movl %edx, %eax
.intel_cpu_features_end:
popl %ebx
ret
.alpha_mask_1a: .int 0xFF00FF00, 0xFF00FF00
.mult_shift: .int 0x00800080, 0x00800080
.alpha_mask_3a: .int 0xFF000000, 0xFF000000
define(`MMX_PIXEL_OP_3A_1A', `
.globl $1_pixels_3a_3a
.align 16
$1_pixels_3a_3a:
pushl %edi
pushl %ebx
movl 12(%esp), %edi
movq .alpha_mask_3a, %mm0
$2
subl $ 2, %ecx
jl .$1_pixels_3a_3a_last
movl $ 8, %ebx
.$1_pixels_3a_3a_loop:
movq (%eax), %mm2
movq (%edx), %mm3
$3
movq %mm1, (%edi)
addl %ebx, %eax
addl %ebx, %edx
addl %ebx, %edi
subl $ 2, %ecx
jge .$1_pixels_3a_3a_loop
.$1_pixels_3a_3a_last:
test $ 1, %ecx
jz .$1_pixels_3a_3a_end
movd (%eax), %mm2
movd (%edx), %mm3
$3
movd %mm1, (%edi)
.$1_pixels_3a_3a_end:
$4
emms
popl %ebx
popl %edi
ret
.globl $1_pixels_1a_1a
.align 16
$1_pixels_1a_1a:
pushl %edi
pushl %ebx
movl 12(%esp), %edi
movq .alpha_mask_1a, %mm0
subl $ 4, %ecx
jl .$1_pixels_1a_1a_last3
movl $ 8, %ebx
.$1_pixels_1a_1a_loop:
movq (%eax), %mm2
movq (%edx), %mm3
$3
movq %mm1, (%edi)
addl %ebx, %eax
addl %ebx, %edx
addl %ebx, %edi
subl $ 4, %ecx
jge .$1_pixels_1a_1a_loop
.$1_pixels_1a_1a_last3:
test $ 2, %ecx
jz .$1_pixels_1a_1a_last1
movd (%eax), %mm2
movd (%edx), %mm3
$3
addl $ 4, %eax
addl $ 4, %edx
addl $ 4, %edi
.$1_pixels_1a_1a_last1:
test $ 1, %ecx
jz .$1_pixels_1a_1a_end
movw (%eax), %bx
movd %ebx, %mm2
movw (%edx), %bx
movd %ebx, %mm3
$3
movd %mm1, %ebx
movw %bx, (%edi)
.$1_pixels_1a_1a_end:
$4
emms
popl %ebx
popl %edi
ret')
/* min(a,b) = a - max(a-b, 0) */
MMX_PIXEL_OP_3A_1A(`add', `', `
movq %mm2, %mm4
paddusb %mm3, %mm4
movq %mm0, %mm1
pandn %mm4, %mm1
movq %mm2, %mm4
psubusb %mm3, %mm4
psubb %mm4, %mm2
pand %mm0, %mm2
por %mm2, %mm1', `')
MMX_PIXEL_OP_3A_1A(`substract', `', `
movq %mm2, %mm4
psubusb %mm3, %mm4
movq %mm0, %mm1
pandn %mm4, %mm1
psubb %mm4, %mm2
pand %mm0, %mm2
por %mm2, %mm1', `')
MMX_PIXEL_OP_3A_1A(`difference', `', `
movq %mm2, %mm4
movq %mm3, %mm5
psubusb %mm3, %mm4
psubusb %mm2, %mm5
movq %mm0, %mm1
paddb %mm5, %mm4
pandn %mm4, %mm1
psubb %mm4, %mm2
pand %mm0, %mm2
por %mm2, %mm1', `')
MMX_PIXEL_OP_3A_1A(`multiply', `
movq .mult_shift, %mm7
pxor %mm6, %mm6',`
movq %mm2, %mm1
punpcklbw %mm6, %mm1
movq %mm3, %mm5
punpcklbw %mm6, %mm5
pmullw %mm5, %mm1
paddw %mm7, %mm1
movq %mm1, %mm5
psrlw $ 8, %mm5
paddw %mm5, %mm1
psrlw $ 8, %mm1
movq %mm2, %mm4
punpckhbw %mm6, %mm4
movq %mm3, %mm5
punpckhbw %mm6, %mm5
pmullw %mm5, %mm4
paddw %mm7, %mm4
movq %mm4, %mm5
psrlw $ 8, %mm5
paddw %mm5, %mm4
psrlw $ 8, %mm4
packuswb %mm4, %mm1
movq %mm0, %mm4
pandn %mm1, %mm4
movq %mm4, %mm1
movq %mm2, %mm4
psubusb %mm3, %mm4
psubb %mm4, %mm2
pand %mm0, %mm2
por %mm2, %mm1', `')
/* Could be perhaps more optimized */
MMX_PIXEL_OP_3A_1A(`darken', `', `
movq %mm2, %mm4
psubusb %mm3, %mm4
psubb %mm4, %mm2
movq %mm2, %mm1', `')
MMX_PIXEL_OP_3A_1A(`lighten', `', `
movq %mm2, %mm4
psubusb %mm3, %mm4
paddb %mm4, %mm3
movq %mm0, %mm1
pandn %mm3, %mm1
psubb %mm4, %mm2
pand %mm0, %mm2
por %mm2, %mm1', `')
MMX_PIXEL_OP_3A_1A(`screen', `
movq .mult_shift, %mm7
pxor %mm6, %mm6',`
pcmpeqb %mm4, %mm4
psubb %mm2, %mm4
pcmpeqb %mm5, %mm5
psubb %mm3, %mm5
movq %mm4, %mm1
punpcklbw %mm6, %mm1
movq %mm5, %mm3
punpcklbw %mm6, %mm3
pmullw %mm3, %mm1
paddw %mm7, %mm1
movq %mm1, %mm3
psrlw $ 8, %mm3
paddw %mm3, %mm1
psrlw $ 8, %mm1
movq %mm4, %mm2
punpckhbw %mm6, %mm2
movq %mm5, %mm3
punpckhbw %mm6, %mm3
pmullw %mm3, %mm2
paddw %mm7, %mm2
movq %mm2, %mm3
psrlw $ 8, %mm3
paddw %mm3, %mm2
psrlw $ 8, %mm2
packuswb %mm2, %mm1
pcmpeqb %mm3, %mm3
psubb %mm1, %mm3
movq %mm0, %mm1
pandn %mm3, %mm1
movq %mm2, %mm4
psubusb %mm5, %mm2
paddb %mm2, %mm5
pcmpeqb %mm3, %mm3
psubb %mm5, %mm3
pand %mm0, %mm3
por %mm3, %mm1', `')
.lower_ff: .int 0x00FF00FF, 0x00FF00FF
MMX_PIXEL_OP_3A_1A(`overlay', `
movq .mult_shift, %mm7
pxor %mm6, %mm6 ',
`call op_overlay', `')
op_overlay:
movq %mm2, %mm1
punpcklbw %mm6, %mm1
movq %mm3, %mm5
punpcklbw %mm6, %mm5
pmullw %mm5, %mm1
paddw %mm7, %mm1
movq %mm1, %mm5
psrlw $ 8, %mm5
paddw %mm5, %mm1
psrlw $ 8, %mm1
pcmpeqb %mm4, %mm4
psubb %mm2, %mm4
punpcklbw %mm6, %mm4
pcmpeqb %mm5, %mm5
psubb %mm3, %mm5
punpcklbw %mm6, %mm5
pmullw %mm5, %mm4
paddw %mm7, %mm4
movq %mm4, %mm5
psrlw $ 8, %mm5
paddw %mm5, %mm4
psrlw $ 8, %mm4
movq .lower_ff, %mm5
psubw %mm4, %mm5
psubw %mm1, %mm5
movq %mm2, %mm4
punpcklbw %mm6, %mm4
pmullw %mm4, %mm5
paddw %mm7, %mm5
movq %mm5, %mm4
psrlw $ 8, %mm4
paddw %mm4, %mm5
psrlw $ 8, %mm5
paddw %mm1, %mm5
subl $ 8, %esp
movq %mm5, (%esp)
movq %mm2, %mm1
punpckhbw %mm6, %mm1
movq %mm3, %mm5
punpckhbw %mm6, %mm5
pmullw %mm5, %mm1
paddw %mm7, %mm1
movq %mm1, %mm5
psrlw $ 8, %mm5
paddw %mm5, %mm1
psrlw $ 8, %mm1
pcmpeqb %mm4, %mm4
psubb %mm2, %mm4
punpckhbw %mm6, %mm4
pcmpeqb %mm5, %mm5
psubb %mm3, %mm5
punpckhbw %mm6, %mm5
pmullw %mm5, %mm4
paddw %mm7, %mm4
movq %mm4, %mm5
psrlw $ 8, %mm5
paddw %mm5, %mm4
psrlw $ 8, %mm4
movq .lower_ff, %mm5
psubw %mm4, %mm5
psubw %mm1, %mm5
movq %mm2, %mm4
punpckhbw %mm6, %mm4
pmullw %mm4, %mm5
paddw %mm7, %mm5
movq %mm5, %mm4
psrlw $ 8, %mm4
paddw %mm4, %mm5
psrlw $ 8, %mm5
paddw %mm1, %mm5
movq (%esp), %mm4
addl $ 8, %esp
packuswb %mm5, %mm4
movq %mm0, %mm1
pandn %mm4, %mm1
movq %mm2, %mm4
psubusb %mm3, %mm4
psubb %mm4, %mm2
pand %mm0, %mm2
por %mm2, %mm1
ret

View File

@ -78,6 +78,12 @@ gboolean use_debug_handler = FALSE;
gboolean console_messages = FALSE; gboolean console_messages = FALSE;
gboolean restore_session = FALSE; gboolean restore_session = FALSE;
gboolean double_speed = FALSE; gboolean double_speed = FALSE;
gboolean use_mmx = FALSE;
/* TODO: this should probably go into a header file */
#ifdef USE_GCC_INTEL_MMX
unsigned long intel_cpu_features(void);
#endif
MessageHandlerType message_handler = CONSOLE; MessageHandlerType message_handler = CONSOLE;
@ -149,6 +155,11 @@ main (int argc,
use_shm = TRUE; use_shm = TRUE;
#endif #endif
#ifdef HAVE_ASM_MMX
use_mmx = (intel_cpu_features() & (1 << 23)) ? 1 : 0;
fprintf(stderr, "MMX : %s\n", use_mmx ? "yes" : "no");
#endif
batch_cmds = g_new (char *, argc); batch_cmds = g_new (char *, argc);
batch_cmds[0] = NULL; batch_cmds[0] = NULL;

View File

@ -0,0 +1,5 @@
#include "config.h"
#ifdef HAVE_ASM_MMX
#include <arch/i386/mmx/paint_funcs_mmx.S>
#endif

View File

@ -58,7 +58,6 @@
#define INT_BLEND(a,b,alpha,tmp) (INT_MULT((a)-(b), alpha, tmp) + (b)) #define INT_BLEND(a,b,alpha,tmp) (INT_MULT((a)-(b), alpha, tmp) + (b))
typedef enum typedef enum
{ {
MinifyX_MinifyY, MinifyX_MinifyY,
@ -153,7 +152,41 @@ static void apply_layer_mode_replace (guchar *src1,
gboolean *affect); gboolean *affect);
static void rotate_pointers (gpointer *p, static void rotate_pointers (gpointer *p,
guint32 n); guint32 n);
/* MMX stuff */
extern gboolean use_mmx;
#define USE_GCC_INTEL_MMX
#ifdef USE_GCC_INTEL_MMX
extern int use_mmx;
#define MMX_PIXEL_OP(x) \
void \
x( \
const unsigned char *src1, \
const unsigned char *src2, \
unsigned count, \
unsigned char *dst) __attribute((regparm(3)));
#define MMX_PIXEL_OP_3A_1A(op) \
MMX_PIXEL_OP(op##_pixels_3a_3a) \
MMX_PIXEL_OP(op##_pixels_1a_1a)
#define USE_MMX_PIXEL_OP_3A_1A(op) \
if (use_mmx && has_alpha1 && has_alpha2) \
{ \
if (bytes1==2 && bytes2==2) \
return op##_pixels_1a_1a(src1, src2, length, dest); \
if (bytes1==4 && bytes2==4) \
return op##_pixels_3a_3a(src1, src2, length, dest); \
} \
/*fprintf(stderr, "non-MMX: %s(%d, %d, %d, %d)\n", #op, \
bytes1, bytes2, has_alpha1, has_alpha2);*/
#else
#define MMX_PIXEL_OP_3A_1A(op)
#define USE_MMX_PIXEL_OP_3A_1A(op)
#endif
void void
@ -715,6 +748,7 @@ extract_alpha_pixels (const guchar *src,
} }
} }
MMX_PIXEL_OP_3A_1A(darken)
void void
darken_pixels (const guchar *src1, darken_pixels (const guchar *src1,
const guchar *src2, const guchar *src2,
@ -728,6 +762,8 @@ darken_pixels (const guchar *src1,
gint b, alpha; gint b, alpha;
guchar s1, s2; guchar s1, s2;
USE_MMX_PIXEL_OP_3A_1A(darken)
alpha = (has_alpha1 || has_alpha2) ? MAX (bytes1, bytes2) - 1 : bytes1; alpha = (has_alpha1 || has_alpha2) ? MAX (bytes1, bytes2) - 1 : bytes1;
while (length--) while (length--)
@ -750,7 +786,7 @@ darken_pixels (const guchar *src1,
} }
} }
MMX_PIXEL_OP_3A_1A(lighten)
void void
lighten_pixels (const guchar *src1, lighten_pixels (const guchar *src1,
const guchar *src2, const guchar *src2,
@ -764,6 +800,8 @@ lighten_pixels (const guchar *src1,
gint b, alpha; gint b, alpha;
guchar s1, s2; guchar s1, s2;
USE_MMX_PIXEL_OP_3A_1A(lighten)
alpha = (has_alpha1 || has_alpha2) ? MAX (bytes1, bytes2) - 1 : bytes1; alpha = (has_alpha1 || has_alpha2) ? MAX (bytes1, bytes2) - 1 : bytes1;
while (length--) while (length--)
@ -881,6 +919,7 @@ color_only_pixels (const guchar *src1,
} }
} }
MMX_PIXEL_OP_3A_1A(multiply)
void void
multiply_pixels (const guchar *src1, multiply_pixels (const guchar *src1,
const guchar *src2, const guchar *src2,
@ -894,6 +933,8 @@ multiply_pixels (const guchar *src1,
gint alpha, b; gint alpha, b;
gint tmp; gint tmp;
USE_MMX_PIXEL_OP_3A_1A(multiply)
alpha = (has_alpha1 || has_alpha2) ? MAX (bytes1, bytes2) - 1 : bytes1; alpha = (has_alpha1 || has_alpha2) ? MAX (bytes1, bytes2) - 1 : bytes1;
if (has_alpha1 && has_alpha2) if (has_alpha1 && has_alpha2)
@ -973,6 +1014,8 @@ divide_pixels (const guchar *src1,
} }
MMX_PIXEL_OP_3A_1A(screen)
void void
screen_pixels (const guchar *src1, screen_pixels (const guchar *src1,
const guchar *src2, const guchar *src2,
@ -986,6 +1029,8 @@ screen_pixels (const guchar *src1,
gint alpha, b; gint alpha, b;
gint tmp; gint tmp;
USE_MMX_PIXEL_OP_3A_1A(screen)
alpha = (has_alpha1 || has_alpha2) ? MAX (bytes1, bytes2) - 1 : bytes1; alpha = (has_alpha1 || has_alpha2) ? MAX (bytes1, bytes2) - 1 : bytes1;
while (length --) while (length --)
@ -1005,6 +1050,8 @@ screen_pixels (const guchar *src1,
} }
MMX_PIXEL_OP_3A_1A(overlay)
void void
overlay_pixels (const guchar *src1, overlay_pixels (const guchar *src1,
const guchar *src2, const guchar *src2,
@ -1153,6 +1200,8 @@ hardlight_pixels (const guchar *src1,
} }
MMX_PIXEL_OP_3A_1A(add)
void void
add_pixels (const guchar *src1, add_pixels (const guchar *src1,
const guchar *src2, const guchar *src2,
@ -1165,6 +1214,8 @@ add_pixels (const guchar *src1,
{ {
gint alpha, b; gint alpha, b;
USE_MMX_PIXEL_OP_3A_1A(add)
alpha = (has_alpha1 || has_alpha2) ? MAX (bytes1, bytes2) - 1 : bytes1; alpha = (has_alpha1 || has_alpha2) ? MAX (bytes1, bytes2) - 1 : bytes1;
while (length --) while (length --)
@ -1187,6 +1238,8 @@ add_pixels (const guchar *src1,
} }
MMX_PIXEL_OP_3A_1A(substract)
void void
subtract_pixels (const guchar *src1, subtract_pixels (const guchar *src1,
const guchar *src2, const guchar *src2,
@ -1200,6 +1253,8 @@ subtract_pixels (const guchar *src1,
gint alpha, b; gint alpha, b;
gint diff; gint diff;
USE_MMX_PIXEL_OP_3A_1A(substract)
alpha = (has_alpha1 || has_alpha2) ? MAX (bytes1, bytes2) - 1 : bytes1; alpha = (has_alpha1 || has_alpha2) ? MAX (bytes1, bytes2) - 1 : bytes1;
while (length --) while (length --)
@ -1222,6 +1277,8 @@ subtract_pixels (const guchar *src1,
} }
MMX_PIXEL_OP_3A_1A(difference)
void void
difference_pixels (const guchar *src1, difference_pixels (const guchar *src1,
const guchar *src2, const guchar *src2,
@ -1235,6 +1292,8 @@ difference_pixels (const guchar *src1,
gint alpha, b; gint alpha, b;
gint diff; gint diff;
USE_MMX_PIXEL_OP_3A_1A(difference)
alpha = (has_alpha1 || has_alpha2) ? MAX (bytes1, bytes2) - 1 : bytes1; alpha = (has_alpha1 || has_alpha2) ? MAX (bytes1, bytes2) - 1 : bytes1;
while (length --) while (length --)

View File

@ -58,7 +58,6 @@
#define INT_BLEND(a,b,alpha,tmp) (INT_MULT((a)-(b), alpha, tmp) + (b)) #define INT_BLEND(a,b,alpha,tmp) (INT_MULT((a)-(b), alpha, tmp) + (b))
typedef enum typedef enum
{ {
MinifyX_MinifyY, MinifyX_MinifyY,
@ -153,7 +152,41 @@ static void apply_layer_mode_replace (guchar *src1,
gboolean *affect); gboolean *affect);
static void rotate_pointers (gpointer *p, static void rotate_pointers (gpointer *p,
guint32 n); guint32 n);
/* MMX stuff */
extern gboolean use_mmx;
#define USE_GCC_INTEL_MMX
#ifdef USE_GCC_INTEL_MMX
extern int use_mmx;
#define MMX_PIXEL_OP(x) \
void \
x( \
const unsigned char *src1, \
const unsigned char *src2, \
unsigned count, \
unsigned char *dst) __attribute((regparm(3)));
#define MMX_PIXEL_OP_3A_1A(op) \
MMX_PIXEL_OP(op##_pixels_3a_3a) \
MMX_PIXEL_OP(op##_pixels_1a_1a)
#define USE_MMX_PIXEL_OP_3A_1A(op) \
if (use_mmx && has_alpha1 && has_alpha2) \
{ \
if (bytes1==2 && bytes2==2) \
return op##_pixels_1a_1a(src1, src2, length, dest); \
if (bytes1==4 && bytes2==4) \
return op##_pixels_3a_3a(src1, src2, length, dest); \
} \
/*fprintf(stderr, "non-MMX: %s(%d, %d, %d, %d)\n", #op, \
bytes1, bytes2, has_alpha1, has_alpha2);*/
#else
#define MMX_PIXEL_OP_3A_1A(op)
#define USE_MMX_PIXEL_OP_3A_1A(op)
#endif
void void
@ -715,6 +748,7 @@ extract_alpha_pixels (const guchar *src,
} }
} }
MMX_PIXEL_OP_3A_1A(darken)
void void
darken_pixels (const guchar *src1, darken_pixels (const guchar *src1,
const guchar *src2, const guchar *src2,
@ -728,6 +762,8 @@ darken_pixels (const guchar *src1,
gint b, alpha; gint b, alpha;
guchar s1, s2; guchar s1, s2;
USE_MMX_PIXEL_OP_3A_1A(darken)
alpha = (has_alpha1 || has_alpha2) ? MAX (bytes1, bytes2) - 1 : bytes1; alpha = (has_alpha1 || has_alpha2) ? MAX (bytes1, bytes2) - 1 : bytes1;
while (length--) while (length--)
@ -750,7 +786,7 @@ darken_pixels (const guchar *src1,
} }
} }
MMX_PIXEL_OP_3A_1A(lighten)
void void
lighten_pixels (const guchar *src1, lighten_pixels (const guchar *src1,
const guchar *src2, const guchar *src2,
@ -764,6 +800,8 @@ lighten_pixels (const guchar *src1,
gint b, alpha; gint b, alpha;
guchar s1, s2; guchar s1, s2;
USE_MMX_PIXEL_OP_3A_1A(lighten)
alpha = (has_alpha1 || has_alpha2) ? MAX (bytes1, bytes2) - 1 : bytes1; alpha = (has_alpha1 || has_alpha2) ? MAX (bytes1, bytes2) - 1 : bytes1;
while (length--) while (length--)
@ -881,6 +919,7 @@ color_only_pixels (const guchar *src1,
} }
} }
MMX_PIXEL_OP_3A_1A(multiply)
void void
multiply_pixels (const guchar *src1, multiply_pixels (const guchar *src1,
const guchar *src2, const guchar *src2,
@ -894,6 +933,8 @@ multiply_pixels (const guchar *src1,
gint alpha, b; gint alpha, b;
gint tmp; gint tmp;
USE_MMX_PIXEL_OP_3A_1A(multiply)
alpha = (has_alpha1 || has_alpha2) ? MAX (bytes1, bytes2) - 1 : bytes1; alpha = (has_alpha1 || has_alpha2) ? MAX (bytes1, bytes2) - 1 : bytes1;
if (has_alpha1 && has_alpha2) if (has_alpha1 && has_alpha2)
@ -973,6 +1014,8 @@ divide_pixels (const guchar *src1,
} }
MMX_PIXEL_OP_3A_1A(screen)
void void
screen_pixels (const guchar *src1, screen_pixels (const guchar *src1,
const guchar *src2, const guchar *src2,
@ -986,6 +1029,8 @@ screen_pixels (const guchar *src1,
gint alpha, b; gint alpha, b;
gint tmp; gint tmp;
USE_MMX_PIXEL_OP_3A_1A(screen)
alpha = (has_alpha1 || has_alpha2) ? MAX (bytes1, bytes2) - 1 : bytes1; alpha = (has_alpha1 || has_alpha2) ? MAX (bytes1, bytes2) - 1 : bytes1;
while (length --) while (length --)
@ -1005,6 +1050,8 @@ screen_pixels (const guchar *src1,
} }
MMX_PIXEL_OP_3A_1A(overlay)
void void
overlay_pixels (const guchar *src1, overlay_pixels (const guchar *src1,
const guchar *src2, const guchar *src2,
@ -1153,6 +1200,8 @@ hardlight_pixels (const guchar *src1,
} }
MMX_PIXEL_OP_3A_1A(add)
void void
add_pixels (const guchar *src1, add_pixels (const guchar *src1,
const guchar *src2, const guchar *src2,
@ -1165,6 +1214,8 @@ add_pixels (const guchar *src1,
{ {
gint alpha, b; gint alpha, b;
USE_MMX_PIXEL_OP_3A_1A(add)
alpha = (has_alpha1 || has_alpha2) ? MAX (bytes1, bytes2) - 1 : bytes1; alpha = (has_alpha1 || has_alpha2) ? MAX (bytes1, bytes2) - 1 : bytes1;
while (length --) while (length --)
@ -1187,6 +1238,8 @@ add_pixels (const guchar *src1,
} }
MMX_PIXEL_OP_3A_1A(substract)
void void
subtract_pixels (const guchar *src1, subtract_pixels (const guchar *src1,
const guchar *src2, const guchar *src2,
@ -1200,6 +1253,8 @@ subtract_pixels (const guchar *src1,
gint alpha, b; gint alpha, b;
gint diff; gint diff;
USE_MMX_PIXEL_OP_3A_1A(substract)
alpha = (has_alpha1 || has_alpha2) ? MAX (bytes1, bytes2) - 1 : bytes1; alpha = (has_alpha1 || has_alpha2) ? MAX (bytes1, bytes2) - 1 : bytes1;
while (length --) while (length --)
@ -1222,6 +1277,8 @@ subtract_pixels (const guchar *src1,
} }
MMX_PIXEL_OP_3A_1A(difference)
void void
difference_pixels (const guchar *src1, difference_pixels (const guchar *src1,
const guchar *src2, const guchar *src2,
@ -1235,6 +1292,8 @@ difference_pixels (const guchar *src1,
gint alpha, b; gint alpha, b;
gint diff; gint diff;
USE_MMX_PIXEL_OP_3A_1A(difference)
alpha = (has_alpha1 || has_alpha2) ? MAX (bytes1, bytes2) - 1 : bytes1; alpha = (has_alpha1 || has_alpha2) ? MAX (bytes1, bytes2) - 1 : bytes1;
while (length --) while (length --)

5
app/paint_funcs_simd.S Normal file
View File

@ -0,0 +1,5 @@
#include "config.h"
#ifdef HAVE_ASM_MMX
#include <arch/i386/mmx/paint_funcs_mmx.S>
#endif

View File

@ -160,6 +160,50 @@ CPPFLAGS="$CPPFLAGS $GTK_CFLAGS"
LDFLAGS="$LDFLAGS `echo $GTK_LIBS | sed 's/\(.*\)\(-lgtk.*\)/\1/'`" LDFLAGS="$LDFLAGS `echo $GTK_LIBS | sed 's/\(.*\)\(-lgtk.*\)/\1/'`"
LIBS="$LIBS $GTK_LIBS" LIBS="$LIBS $GTK_LIBS"
dnl Test for MMX stuff
have_asm_mmx=false
AC_MSG_CHECKING([for Intel Pentium architecture (IA32)])
if test "$host_cpu" = "i386" -o "$host_cpu" = "i486"\
-o "$host_cpu" = "i586" -o "$host_cpu" = "i586"\
-o "$host_cpu" = "i686" -o "$host_cpu" = "i786" ;
then
AC_MSG_RESULT(yes)
AC_MSG_CHECKING([for support for gcc-style register parameters on Intel])
AC_TRY_COMPILE([],
[extern void x(
const unsigned char *src1,
const unsigned char *src2,
unsigned count,
unsigned char *dst) __attribute((regparm(3)));],
[AC_MSG_RESULT(yes)
AC_MSG_CHECKING([for support for MMX in assembly code])
cat > conftest.S <<EOF
.text
psubusb %mm3, %mm4
EOF
if $CC -c conftest.S ; then
AC_MSG_RESULT(yes)
rm -f conftest.*
AC_DEFINE(HAVE_ASM_MMX)
have_asm_mmx=true
else
AC_MSG_RESULT(no)
echo "configure: failed program was:" >&AC_FD_CC
cat conftest.S >&AC_FD_CC
rm -rf conftest.* ;
fi
],
[AC_MSG_RESULT(no)
AC_MSG_WARN(*** C compiler does not support __attribute((regparm(3))), MMX code will not be built)]);
else
AC_MSG_RESULT(no) ;
fi
AM_CONDITIONAL(HAVE_ASM_MMX, test x$have_asm_mmx = xtrue)
dnl Test for Xmu dnl Test for Xmu
if test -z "$LIBXMU"; then if test -z "$LIBXMU"; then
AC_CHECK_LIB(Xmu, XmuClientWindow, AC_CHECK_LIB(Xmu, XmuClientWindow,