gimp-composite-mmx-{test,installer}.c
* gimp-composite-mmx-{test,installer}.c * gimp-composite-sse-{test,installer}.c * gimp-composite-sse2-{test,installer}.c * make-installer.py: sort test output by mode name * gimp-composite-sse2.[ch]: New compositing mode implementations darken difference grain_extract lighten * gimp-composite-x86.h: added * gimp-composite-{sse,mmx,sse2}.[ch]: Tightening declarations of clobbered registers.
This commit is contained in:
17
ChangeLog
17
ChangeLog
@ -1,3 +1,20 @@
|
||||
2003-09-15 Helvetix Victorinox <helvetix@gimp.org>
|
||||
|
||||
* gimp-composite-mmx-{test,installer}.c
|
||||
* gimp-composite-sse-{test,installer}.c
|
||||
* gimp-composite-sse2-{test,installer}.c
|
||||
* make-installer.py: sort test output by mode name
|
||||
|
||||
* gimp-composite-sse2.[ch]: New compositing mode implementations
|
||||
darken
|
||||
difference
|
||||
grain_extract
|
||||
lighten
|
||||
|
||||
* gimp-composite-x86.h: added
|
||||
|
||||
* gimp-composite-{sse,mmx,sse2}.[ch]: Tightening declarations of clobbered registers.
|
||||
|
||||
2003-09-16 Manish Singh <yosh@gimp.org>
|
||||
|
||||
* app/Makefile.am: use -u to prevent garbage collection of symbols
|
||||
|
@ -71,7 +71,7 @@ main (int argc, char *argv[])
|
||||
putenv ("GIMP_COMPOSITE=0x1");
|
||||
|
||||
iterations = 1;
|
||||
n_pixels = 1048577;
|
||||
n_pixels = 163921;
|
||||
|
||||
argv++, argc--;
|
||||
while (argc >= 2) {
|
||||
|
@ -71,7 +71,7 @@ main (int argc, char *argv[])
|
||||
putenv ("GIMP_COMPOSITE=0x1");
|
||||
|
||||
iterations = 1;
|
||||
n_pixels = 1048577;
|
||||
n_pixels = 163921;
|
||||
|
||||
argv++, argc--;
|
||||
while (argc >= 2) {
|
||||
|
@ -57,74 +57,52 @@ gimp_composite_mmx_test (int iterations, int n_pixels)
|
||||
}
|
||||
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_MULTIPLY, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_MULTIPLY, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_multiply_rgba8_rgba8_rgba8_mmx, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("multiply", &generic_ctx, &special_ctx)) {
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("multiply", ft0, ft1);
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_SCREEN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_SCREEN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_screen_rgba8_rgba8_rgba8_mmx, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("screen", &generic_ctx, &special_ctx)) {
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("screen", ft0, ft1);
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_DIFFERENCE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_DIFFERENCE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_difference_rgba8_rgba8_rgba8_mmx, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("difference", &generic_ctx, &special_ctx)) {
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("difference", ft0, ft1);
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_ADDITION, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_ADDITION, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_addition_rgba8_rgba8_rgba8_mmx, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("addition", &generic_ctx, &special_ctx)) {
|
||||
printf("addition failed\n");
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("addition", ft0, ft1);
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_SUBTRACT, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_SUBTRACT, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_BURN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_BURN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_subtract_rgba8_rgba8_rgba8_mmx, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("subtract", &generic_ctx, &special_ctx)) {
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_burn_rgba8_rgba8_rgba8_mmx, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("burn", &generic_ctx, &special_ctx)) {
|
||||
printf("burn failed\n");
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("subtract", ft0, ft1);
|
||||
gimp_composite_regression_timer_report ("burn", ft0, ft1);
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_DARKEN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_DARKEN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_darken_rgba8_rgba8_rgba8_mmx, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("darken", &generic_ctx, &special_ctx)) {
|
||||
printf("darken failed\n");
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("darken", ft0, ft1);
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_LIGHTEN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_LIGHTEN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_DIFFERENCE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_DIFFERENCE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_lighten_rgba8_rgba8_rgba8_mmx, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("lighten", &generic_ctx, &special_ctx)) {
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_difference_rgba8_rgba8_rgba8_mmx, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("difference", &generic_ctx, &special_ctx)) {
|
||||
printf("difference failed\n");
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("lighten", ft0, ft1);
|
||||
gimp_composite_regression_timer_report ("difference", ft0, ft1);
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_DIVIDE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_DIVIDE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_divide_rgba8_rgba8_rgba8_mmx, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("divide", &generic_ctx, &special_ctx)) {
|
||||
printf("divide failed\n");
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("divide", ft0, ft1);
|
||||
@ -134,24 +112,17 @@ gimp_composite_mmx_test (int iterations, int n_pixels)
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_dodge_rgba8_rgba8_rgba8_mmx, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("dodge", &generic_ctx, &special_ctx)) {
|
||||
printf("dodge failed\n");
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("dodge", ft0, ft1);
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_BURN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_BURN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_burn_rgba8_rgba8_rgba8_mmx, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("burn", &generic_ctx, &special_ctx)) {
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("burn", ft0, ft1);
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_GRAIN_EXTRACT, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_GRAIN_EXTRACT, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_grain_extract_rgba8_rgba8_rgba8_mmx, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("grain_extract", &generic_ctx, &special_ctx)) {
|
||||
printf("grain_extract failed\n");
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("grain_extract", ft0, ft1);
|
||||
@ -161,27 +132,70 @@ gimp_composite_mmx_test (int iterations, int n_pixels)
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_grain_merge_rgba8_rgba8_rgba8_mmx, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("grain_merge", &generic_ctx, &special_ctx)) {
|
||||
printf("grain_merge failed\n");
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("grain_merge", ft0, ft1);
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_SWAP, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_SWAP, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_LIGHTEN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_LIGHTEN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_swap_rgba8_rgba8_rgba8_mmx, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("swap", &generic_ctx, &special_ctx)) {
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_lighten_rgba8_rgba8_rgba8_mmx, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("lighten", &generic_ctx, &special_ctx)) {
|
||||
printf("lighten failed\n");
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("swap", ft0, ft1);
|
||||
gimp_composite_regression_timer_report ("lighten", ft0, ft1);
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_MULTIPLY, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_MULTIPLY, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_multiply_rgba8_rgba8_rgba8_mmx, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("multiply", &generic_ctx, &special_ctx)) {
|
||||
printf("multiply failed\n");
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("multiply", ft0, ft1);
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_SCALE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_SCALE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_scale_rgba8_rgba8_rgba8_mmx, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("scale", &generic_ctx, &special_ctx)) {
|
||||
printf("scale failed\n");
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("scale", ft0, ft1);
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_SCREEN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_SCREEN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_screen_rgba8_rgba8_rgba8_mmx, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("screen", &generic_ctx, &special_ctx)) {
|
||||
printf("screen failed\n");
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("screen", ft0, ft1);
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_SUBTRACT, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_SUBTRACT, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_subtract_rgba8_rgba8_rgba8_mmx, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("subtract", &generic_ctx, &special_ctx)) {
|
||||
printf("subtract failed\n");
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("subtract", ft0, ft1);
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_SWAP, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_SWAP, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_swap_rgba8_rgba8_rgba8_mmx, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("swap", &generic_ctx, &special_ctx)) {
|
||||
printf("swap failed\n");
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("swap", ft0, ft1);
|
||||
#endif
|
||||
return (0);
|
||||
}
|
||||
@ -197,7 +211,7 @@ main (int argc, char *argv[])
|
||||
putenv ("GIMP_COMPOSITE=0x1");
|
||||
|
||||
iterations = 1;
|
||||
n_pixels = 1048577;
|
||||
n_pixels = 163921;
|
||||
|
||||
argv++, argc--;
|
||||
while (argc >= 2) {
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -127,7 +127,6 @@ gimp_composite_regression_compare_contexts (char *operation, GimpCompositeContex
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
@ -57,74 +57,52 @@ gimp_composite_sse_test (int iterations, int n_pixels)
|
||||
}
|
||||
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_MULTIPLY, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_MULTIPLY, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_multiply_rgba8_rgba8_rgba8_sse, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("multiply", &generic_ctx, &special_ctx)) {
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("multiply", ft0, ft1);
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_SCREEN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_SCREEN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_screen_rgba8_rgba8_rgba8_sse, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("screen", &generic_ctx, &special_ctx)) {
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("screen", ft0, ft1);
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_DIFFERENCE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_DIFFERENCE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_difference_rgba8_rgba8_rgba8_sse, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("difference", &generic_ctx, &special_ctx)) {
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("difference", ft0, ft1);
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_ADDITION, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_ADDITION, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_addition_rgba8_rgba8_rgba8_sse, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("addition", &generic_ctx, &special_ctx)) {
|
||||
printf("addition failed\n");
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("addition", ft0, ft1);
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_SUBTRACT, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_SUBTRACT, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_BURN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_BURN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_subtract_rgba8_rgba8_rgba8_sse, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("subtract", &generic_ctx, &special_ctx)) {
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_burn_rgba8_rgba8_rgba8_sse, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("burn", &generic_ctx, &special_ctx)) {
|
||||
printf("burn failed\n");
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("subtract", ft0, ft1);
|
||||
gimp_composite_regression_timer_report ("burn", ft0, ft1);
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_DARKEN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_DARKEN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_darken_rgba8_rgba8_rgba8_sse, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("darken", &generic_ctx, &special_ctx)) {
|
||||
printf("darken failed\n");
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("darken", ft0, ft1);
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_LIGHTEN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_LIGHTEN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_DIFFERENCE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_DIFFERENCE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_lighten_rgba8_rgba8_rgba8_sse, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("lighten", &generic_ctx, &special_ctx)) {
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_difference_rgba8_rgba8_rgba8_sse, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("difference", &generic_ctx, &special_ctx)) {
|
||||
printf("difference failed\n");
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("lighten", ft0, ft1);
|
||||
gimp_composite_regression_timer_report ("difference", ft0, ft1);
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_DIVIDE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_DIVIDE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_divide_rgba8_rgba8_rgba8_sse, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("divide", &generic_ctx, &special_ctx)) {
|
||||
printf("divide failed\n");
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("divide", ft0, ft1);
|
||||
@ -134,24 +112,17 @@ gimp_composite_sse_test (int iterations, int n_pixels)
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_dodge_rgba8_rgba8_rgba8_sse, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("dodge", &generic_ctx, &special_ctx)) {
|
||||
printf("dodge failed\n");
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("dodge", ft0, ft1);
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_BURN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_BURN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_burn_rgba8_rgba8_rgba8_sse, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("burn", &generic_ctx, &special_ctx)) {
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("burn", ft0, ft1);
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_GRAIN_EXTRACT, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_GRAIN_EXTRACT, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_grain_extract_rgba8_rgba8_rgba8_sse, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("grain_extract", &generic_ctx, &special_ctx)) {
|
||||
printf("grain_extract failed\n");
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("grain_extract", ft0, ft1);
|
||||
@ -161,27 +132,70 @@ gimp_composite_sse_test (int iterations, int n_pixels)
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_grain_merge_rgba8_rgba8_rgba8_sse, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("grain_merge", &generic_ctx, &special_ctx)) {
|
||||
printf("grain_merge failed\n");
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("grain_merge", ft0, ft1);
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_SWAP, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_SWAP, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_LIGHTEN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_LIGHTEN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_swap_rgba8_rgba8_rgba8_sse, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("swap", &generic_ctx, &special_ctx)) {
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_lighten_rgba8_rgba8_rgba8_sse, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("lighten", &generic_ctx, &special_ctx)) {
|
||||
printf("lighten failed\n");
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("swap", ft0, ft1);
|
||||
gimp_composite_regression_timer_report ("lighten", ft0, ft1);
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_MULTIPLY, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_MULTIPLY, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_multiply_rgba8_rgba8_rgba8_sse, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("multiply", &generic_ctx, &special_ctx)) {
|
||||
printf("multiply failed\n");
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("multiply", ft0, ft1);
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_SCALE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_SCALE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_scale_rgba8_rgba8_rgba8_sse, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("scale", &generic_ctx, &special_ctx)) {
|
||||
printf("scale failed\n");
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("scale", ft0, ft1);
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_SCREEN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_SCREEN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_screen_rgba8_rgba8_rgba8_sse, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("screen", &generic_ctx, &special_ctx)) {
|
||||
printf("screen failed\n");
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("screen", ft0, ft1);
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_SUBTRACT, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_SUBTRACT, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_subtract_rgba8_rgba8_rgba8_sse, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("subtract", &generic_ctx, &special_ctx)) {
|
||||
printf("subtract failed\n");
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("subtract", ft0, ft1);
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_SWAP, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_SWAP, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_swap_rgba8_rgba8_rgba8_sse, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("swap", &generic_ctx, &special_ctx)) {
|
||||
printf("swap failed\n");
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("swap", ft0, ft1);
|
||||
#endif
|
||||
return (0);
|
||||
}
|
||||
@ -197,7 +211,7 @@ main (int argc, char *argv[])
|
||||
putenv ("GIMP_COMPOSITE=0x1");
|
||||
|
||||
iterations = 1;
|
||||
n_pixels = 1048577;
|
||||
n_pixels = 163921;
|
||||
|
||||
argv++, argc--;
|
||||
while (argc >= 2) {
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -20,6 +20,10 @@ static struct install_table {
|
||||
{ GIMP_COMPOSITE_DIFFERENCE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, gimp_composite_difference_rgba8_rgba8_rgba8_sse2 },
|
||||
{ GIMP_COMPOSITE_ADDITION, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, gimp_composite_addition_rgba8_rgba8_rgba8_sse2 },
|
||||
{ GIMP_COMPOSITE_SUBTRACT, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, gimp_composite_subtract_rgba8_rgba8_rgba8_sse2 },
|
||||
{ GIMP_COMPOSITE_DARKEN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, gimp_composite_darken_rgba8_rgba8_rgba8_sse2 },
|
||||
{ GIMP_COMPOSITE_LIGHTEN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, gimp_composite_lighten_rgba8_rgba8_rgba8_sse2 },
|
||||
{ GIMP_COMPOSITE_DODGE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, gimp_composite_dodge_rgba8_rgba8_rgba8_sse2 },
|
||||
{ GIMP_COMPOSITE_GRAIN_EXTRACT, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, gimp_composite_grain_extract_rgba8_rgba8_rgba8_sse2 },
|
||||
{ GIMP_COMPOSITE_SWAP, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, gimp_composite_swap_rgba8_rgba8_rgba8_sse2 },
|
||||
#endif
|
||||
{ 0, 0, 0, 0, NULL }
|
||||
|
@ -57,29 +57,72 @@ gimp_composite_sse2_test (int iterations, int n_pixels)
|
||||
}
|
||||
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_DIFFERENCE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_DIFFERENCE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_difference_rgba8_rgba8_rgba8_sse2, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("difference", &generic_ctx, &special_ctx)) {
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("difference", ft0, ft1);
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_ADDITION, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_ADDITION, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_addition_rgba8_rgba8_rgba8_sse2, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("addition", &generic_ctx, &special_ctx)) {
|
||||
printf("addition failed\n");
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("addition", ft0, ft1);
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_DARKEN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_DARKEN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_darken_rgba8_rgba8_rgba8_sse2, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("darken", &generic_ctx, &special_ctx)) {
|
||||
printf("darken failed\n");
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("darken", ft0, ft1);
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_DIFFERENCE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_DIFFERENCE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_difference_rgba8_rgba8_rgba8_sse2, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("difference", &generic_ctx, &special_ctx)) {
|
||||
printf("difference failed\n");
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("difference", ft0, ft1);
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_DODGE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_DODGE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_dodge_rgba8_rgba8_rgba8_sse2, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("dodge", &generic_ctx, &special_ctx)) {
|
||||
printf("dodge failed\n");
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("dodge", ft0, ft1);
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_GRAIN_EXTRACT, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_GRAIN_EXTRACT, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_grain_extract_rgba8_rgba8_rgba8_sse2, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("grain_extract", &generic_ctx, &special_ctx)) {
|
||||
printf("grain_extract failed\n");
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("grain_extract", ft0, ft1);
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_LIGHTEN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_LIGHTEN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_lighten_rgba8_rgba8_rgba8_sse2, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("lighten", &generic_ctx, &special_ctx)) {
|
||||
printf("lighten failed\n");
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("lighten", ft0, ft1);
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_SUBTRACT, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_SUBTRACT, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_subtract_rgba8_rgba8_rgba8_sse2, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("subtract", &generic_ctx, &special_ctx)) {
|
||||
printf("subtract failed\n");
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("subtract", ft0, ft1);
|
||||
@ -89,6 +132,7 @@ gimp_composite_sse2_test (int iterations, int n_pixels)
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_swap_rgba8_rgba8_rgba8_sse2, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("swap", &generic_ctx, &special_ctx)) {
|
||||
printf("swap failed\n");
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("swap", ft0, ft1);
|
||||
@ -107,7 +151,7 @@ main (int argc, char *argv[])
|
||||
putenv ("GIMP_COMPOSITE=0x1");
|
||||
|
||||
iterations = 1;
|
||||
n_pixels = 1048577;
|
||||
n_pixels = 163921;
|
||||
|
||||
argv++, argc--;
|
||||
while (argc >= 2) {
|
||||
|
@ -1,4 +1,6 @@
|
||||
/* The GIMP -- an image manipulation program
|
||||
/* -*- mode: c tab-width: 2; c-basic-indent: 2; indent-tabs-mode: nil -*-
|
||||
*
|
||||
* The GIMP -- an image manipulation program
|
||||
* Copyright (C) 1995 Spencer Kimball and Peter Mattis
|
||||
*
|
||||
* -*- mode: c tab-width: 2; c-basic-indent: 2; indent-tabs-mode: nil -*-
|
||||
@ -34,12 +36,10 @@
|
||||
|
||||
#include "gimp-composite.h"
|
||||
#include "gimp-composite-sse2.h"
|
||||
#include "gimp-composite-x86.h"
|
||||
|
||||
#if __GNUC__ >= 3
|
||||
|
||||
#define pminub(src,dst,tmp) "pminub " "%%" #src ", %%" #dst
|
||||
#define pmaxub(src,dst,tmp) "pmaxub " "%%" #src ", %%" #dst
|
||||
|
||||
const static guint32 rgba8_alpha_mask_128[4] = { 0xFF000000, 0xFF000000, 0xFF000000, 0xFF000000 };
|
||||
const static guint32 rgba8_b1_128[4] = { 0x01010101, 0x01010101, 0x01010101, 0x01010101 };
|
||||
const static guint32 rgba8_b255_128[4] = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF };
|
||||
@ -90,75 +90,74 @@ void
|
||||
gimp_composite_addition_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
{
|
||||
GimpCompositeContext op = *_op;
|
||||
|
||||
asm volatile ("movdqu %0,%%xmm0"
|
||||
|
||||
asm volatile (" movdqu %0,%%xmm0\n"
|
||||
"\tmovq %1,%%mm0"
|
||||
: /* empty */
|
||||
: "m" (*rgba8_alpha_mask_128)
|
||||
: "%xmm0");
|
||||
|
||||
: "m" (*rgba8_alpha_mask_128), "m" (*rgba8_alpha_mask_64)
|
||||
: "%xmm0", "%mm0");
|
||||
|
||||
for (; op.n_pixels >= 4; op.n_pixels -= 4)
|
||||
{
|
||||
asm (" movdqu %0, %%xmm2\n"
|
||||
"\tmovdqu %1, %%xmm3\n"
|
||||
"\tmovdqu %%xmm2, %%xmm4\n"
|
||||
"\tpaddusb %%xmm3, %%xmm4\n"
|
||||
|
||||
"\tmovdqu %%xmm0, %%xmm1\n"
|
||||
"\tpandn %%xmm4, %%xmm1\n"
|
||||
"\t" pminub(xmm3, xmm2, xmm4) "\n"
|
||||
"\tpand %%xmm0, %%xmm2\n"
|
||||
"\tpor %%xmm2, %%xmm1\n"
|
||||
"\tmovdqu %%xmm1, %2\n"
|
||||
: /* empty */
|
||||
: "m" (*op.A), "m" (*op.B), "m" (*op.D)
|
||||
: "0", "1", "2", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7");
|
||||
asm (" movdqu %1,%%xmm2\n"
|
||||
"\tmovdqu %2,%%xmm3\n"
|
||||
"\tmovdqu %%xmm2,%%xmm4\n"
|
||||
"\tpaddusb %%xmm3,%%xmm4\n"
|
||||
|
||||
"\tmovdqu %%xmm0,%%xmm1\n"
|
||||
"\tpandn %%xmm4,%%xmm1\n"
|
||||
"\tpminub %%xmm3,%%xmm2\n"
|
||||
"\tpand %%xmm0,%%xmm2\n"
|
||||
"\tpor %%xmm2,%%xmm1\n"
|
||||
"\tmovdqu %%xmm1,%0\n"
|
||||
: "+m" (*op.D)
|
||||
: "m" (*op.A), "m" (*op.B)
|
||||
: "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7");
|
||||
op.A += 16;
|
||||
op.B += 16;
|
||||
op.D += 16;
|
||||
}
|
||||
|
||||
asm volatile ("movq %0,%%mm0"
|
||||
: /* empty */
|
||||
: "m" (*rgba8_alpha_mask_64)
|
||||
: "%mm0");
|
||||
|
||||
|
||||
for (; op.n_pixels >= 2; op.n_pixels -= 2)
|
||||
{
|
||||
asm (" movq %0, %%mm2\n"
|
||||
"\tmovq %1, %%mm3\n"
|
||||
"\tmovq %%mm2, %%mm4\n"
|
||||
"\tpaddusb %%mm3, %%mm4\n"
|
||||
"\tmovq %%mm0, %%mm1\n"
|
||||
"\tpandn %%mm4, %%mm1\n"
|
||||
"\t" pminub(mm3, mm2, mm4) "\n"
|
||||
"\tpand %%mm0, %%mm2\n"
|
||||
"\tpor %%mm2, %%mm1\n"
|
||||
"\tmovq %%mm1, %2\n"
|
||||
: /* empty */
|
||||
: "m" (*op.A), "m" (*op.B), "m" (*op.D)
|
||||
: "0", "1", "2", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7");
|
||||
asm (" movq %1,%%mm2\n"
|
||||
"\tmovq %2,%%mm3\n"
|
||||
"\tmovq %%mm2,%%mm4\n"
|
||||
"\tpaddusb %%mm3,%%mm4\n"
|
||||
"\tmovq %%mm0,%%mm1\n"
|
||||
"\tpandn %%mm4,%%mm1\n"
|
||||
"\tpminub %%mm3,%%mm2\n"
|
||||
"\tpand %%mm0,%%mm2\n"
|
||||
"\tpor %%mm2,%%mm1\n"
|
||||
"\tmovq %%mm1,%0\n"
|
||||
: "+m" (*op.D)
|
||||
: "m" (*op.A), "m" (*op.B)
|
||||
: "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7");
|
||||
op.A += 8;
|
||||
op.B += 8;
|
||||
op.D += 8;
|
||||
}
|
||||
|
||||
if (op.n_pixels)
|
||||
|
||||
for (; op.n_pixels >= 1; op.n_pixels -= 1)
|
||||
{
|
||||
asm volatile (" movd (%0), %%mm2;\n"
|
||||
"\tmovd (%1), %%mm3;\n"
|
||||
"\tmovq %%mm2, %%mm4\n"
|
||||
"\tpaddusb %%mm3, %%mm4\n"
|
||||
"\tmovq %%mm0, %%mm1\n"
|
||||
"\tpandn %%mm4, %%mm1\n"
|
||||
"\t" pminub(mm3, mm2, mm4) "\n"
|
||||
"\tpand %%mm0, %%mm2\n"
|
||||
"\tpor %%mm2, %%mm1\n"
|
||||
"\tmovd %%mm1, (%2);\n"
|
||||
: /* empty */
|
||||
: "r" (op.A), "r" (op.B), "r" (op.D)
|
||||
: "0", "1", "2", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7");
|
||||
asm volatile (" movd %1,%%mm2\n"
|
||||
"\tmovd %2,%%mm3\n"
|
||||
"\tmovq %%mm2,%%mm4\n"
|
||||
"\tpaddusb %%mm3,%%mm4\n"
|
||||
"\tmovq %%mm0,%%mm1\n"
|
||||
"\tpandn %%mm4,%%mm1\n"
|
||||
"\tpminub %%mm3,%%mm2\n"
|
||||
"\tpand %%mm0,%%mm2\n"
|
||||
"\tpor %%mm2,%%mm1\n"
|
||||
"\tmovd %%mm1,%0\n"
|
||||
: "+m" (*op.D)
|
||||
: "m" (*op.A), "m" (*op.B)
|
||||
: "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7");
|
||||
op.A += 4;
|
||||
op.B += 4;
|
||||
op.D += 4;
|
||||
}
|
||||
|
||||
|
||||
asm("emms");
|
||||
}
|
||||
|
||||
@ -166,31 +165,70 @@ gimp_composite_addition_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
void
|
||||
xxxgimp_composite_burn_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
{
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
xxxgimp_composite_darken_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
gimp_composite_darken_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
{
|
||||
|
||||
GimpCompositeContext op = *_op;
|
||||
|
||||
for (; op.n_pixels >= 4; op.n_pixels -= 4)
|
||||
{
|
||||
asm volatile (" movdqu %1,%%xmm2\n"
|
||||
"\tmovdqu %2,%%xmm3\n"
|
||||
"\tpminub %%xmm3,%%xmm2\n"
|
||||
"\tmovdqu %%xmm2,%0\n"
|
||||
: "=m" (*op.D)
|
||||
: "m" (*op.A), "m" (*op.B)
|
||||
: "%xmm1", "%xmm2", "%xmm3", "%xmm4");
|
||||
op.A += 16;
|
||||
op.B += 16;
|
||||
op.D += 16;
|
||||
}
|
||||
|
||||
for (; op.n_pixels >= 2; op.n_pixels -= 2)
|
||||
{
|
||||
asm volatile (" movq %1, %%mm2\n"
|
||||
"\tpminub %2, %%mm2\n"
|
||||
"\tmovq %%mm2, %0\n"
|
||||
: "=m" (*op.D)
|
||||
: "m" (*op.A), "m" (*op.B)
|
||||
: "%mm1", "%mm2", "%mm3", "%mm4");
|
||||
op.A += 8;
|
||||
op.B += 8;
|
||||
op.D += 8;
|
||||
}
|
||||
|
||||
if (op.n_pixels)
|
||||
{
|
||||
asm volatile (" movd %1, %%mm2\n"
|
||||
"\tmovd %2, %%mm3\n"
|
||||
"\tpminub %%mm3, %%mm2\n"
|
||||
"\tmovd %%mm2, %0\n"
|
||||
: "=m" (*op.D)
|
||||
: "m" (*op.A), "m" (*op.B)
|
||||
: "%mm2", "%mm3", "%mm4");
|
||||
}
|
||||
|
||||
asm("emms");
|
||||
}
|
||||
|
||||
void
|
||||
gimp_composite_difference_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
{
|
||||
GimpCompositeContext op = *_op;
|
||||
|
||||
|
||||
asm volatile (" movq %0,%%mm0\n"
|
||||
"\tmovdqu %1,%%xmm0"
|
||||
:
|
||||
: "m" (*rgba8_alpha_mask_64), "m" (*rgba8_alpha_mask_128)
|
||||
: "%mm0");
|
||||
|
||||
: "%mm0", "%xmm0");
|
||||
|
||||
for (; op.n_pixels >= 4; op.n_pixels -= 4)
|
||||
{
|
||||
asm volatile (" movdqu %0,%%xmm2\n"
|
||||
"\tmovdqu %1,%%xmm3\n"
|
||||
asm volatile (" movdqu %1,%%xmm2\n"
|
||||
"\tmovdqu %2,%%xmm3\n"
|
||||
"\tmovdqu %%xmm2,%%xmm4\n"
|
||||
"\tmovdqu %%xmm3,%%xmm5\n"
|
||||
"\tpsubusb %%xmm3,%%xmm4\n"
|
||||
@ -201,10 +239,10 @@ gimp_composite_difference_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
"\tpminub %%xmm3,%%xmm2\n"
|
||||
"\tpand %%xmm0,%%xmm2\n"
|
||||
"\tpor %%xmm2,%%xmm1\n"
|
||||
"\tmovdqu %%xmm1,%2\n"
|
||||
: /* empty */
|
||||
: "m" (*op.A), "m" (*op.B), "m" (*op.D)
|
||||
: "0", "1", "2", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5");
|
||||
"\tmovdqu %%xmm1,%0\n"
|
||||
: "+m" (*op.D)
|
||||
: "m" (*op.A), "m" (*op.B)
|
||||
: "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5");
|
||||
op.A += 16;
|
||||
op.B += 16;
|
||||
op.D += 16;
|
||||
@ -212,8 +250,8 @@ gimp_composite_difference_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
|
||||
for (; op.n_pixels >= 2; op.n_pixels -= 2)
|
||||
{
|
||||
asm volatile (" movq %0, %%mm2\n"
|
||||
"\tmovq %1, %%mm3\n"
|
||||
asm volatile (" movq %1, %%mm2\n"
|
||||
"\tmovq %2, %%mm3\n"
|
||||
"\tmovq %%mm2, %%mm4\n"
|
||||
"\tmovq %%mm3, %%mm5\n"
|
||||
"\tpsubusb %%mm3, %%mm4\n"
|
||||
@ -224,10 +262,10 @@ gimp_composite_difference_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
"\tpminub %%mm3, %%mm2\n"
|
||||
"\tpand %%mm0, %%mm2\n"
|
||||
"\tpor %%mm2, %%mm1\n"
|
||||
"\tmovq %%mm1, %2\n"
|
||||
: /* empty */
|
||||
: "m" (*op.A), "m" (*op.B), "m" (*op.D)
|
||||
: "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
||||
"\tmovq %%mm1, %0\n"
|
||||
: "+m" (*op.D)
|
||||
: "m" (*op.A), "m" (*op.B)
|
||||
: "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
||||
op.A += 8;
|
||||
op.B += 8;
|
||||
op.D += 8;
|
||||
@ -235,8 +273,8 @@ gimp_composite_difference_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
|
||||
if (op.n_pixels)
|
||||
{
|
||||
asm volatile (" movd %0, %%mm2\n"
|
||||
"\tmovd %1, %%mm3\n"
|
||||
asm volatile (" movd %1, %%mm2\n"
|
||||
"\tmovd %2, %%mm3\n"
|
||||
"\tmovq %%mm2, %%mm4\n"
|
||||
"\tmovq %%mm3, %%mm5\n"
|
||||
"\tpsubusb %%mm3, %%mm4\n"
|
||||
@ -247,141 +285,398 @@ gimp_composite_difference_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
"\tpminub %%mm3, %%mm2\n"
|
||||
"\tpand %%mm0, %%mm2\n"
|
||||
"\tpor %%mm2, %%mm1\n"
|
||||
"\tmovd %%mm1, %2\n"
|
||||
: /* empty */
|
||||
: "m" (*op.A), "m" (*op.B), "m" (*op.D)
|
||||
: "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4");
|
||||
"\tmovd %%mm1, %0\n"
|
||||
: "+m" (*op.D)
|
||||
: "m" (*op.A), "m" (*op.B)
|
||||
: "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
||||
}
|
||||
|
||||
asm("emms");
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
gimp_composite_dodge_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
{
|
||||
GimpCompositeContext op = *_op;
|
||||
|
||||
#if 0
|
||||
for (; op.n_pixels >= 4; op.n_pixels -= 4)
|
||||
{
|
||||
asm volatile (" movdqu %0,%%xmm0\n"
|
||||
"\tmovdqu %1,%%xmm1\n"
|
||||
"\tmovdqu %%xmm1,%%xmm3\n"
|
||||
"\tpxor %%xmm2,%%xmm2\n"
|
||||
"\tpunpcklbw %%xmm2,%%xmm3\n"
|
||||
"\tpunpcklbw %%xmm0,%%xmm2\n"
|
||||
|
||||
"\tmovdqu %3,%%xmm4\n"
|
||||
"\tpsubw %%xmm3,%%xmm4\n"
|
||||
|
||||
"\t" xmm_pdivwuqX(xmm2,xmm4,xmm5,xmm6) "\n"
|
||||
|
||||
"\tmovdqu %%xmm1,%%xmm3\n"
|
||||
"\tpxor %%xmm2,%%xmm2\n"
|
||||
"\tpunpckhbw %%xmm2,%%xmm3\n"
|
||||
"\tpunpckhbw %%xmm0,%%xmm2\n"
|
||||
|
||||
"\tmovdqu %3,%%xmm4\n"
|
||||
"\tpsubw %%xmm3,%%xmm4\n"
|
||||
|
||||
"\t" xmm_pdivwuqX(xmm2,xmm4,xmm6,xmm7) "\n"
|
||||
|
||||
"\tpackuswb %%xmm6,%%xmm5\n"
|
||||
|
||||
"\tmovdqu %4,%%xmm6\n"
|
||||
"\tmovdqu %%xmm1,%%xmm7\n"
|
||||
"\tpminub %%xmm0,%%xmm7\n"
|
||||
"\tpand %%xmm6,%%xmm7\n"
|
||||
"\tpandn %%xmm5,%%xmm6\n"
|
||||
|
||||
"\tpor %%xmm6,%%xmm7\n"
|
||||
|
||||
"\tmovdqu %%xmm7,%2\n"
|
||||
: /* empty */
|
||||
: "m" (*op.A), "m" (*op.B), "m" (*op.D), "m" (*rgba8_w256_128), "m" (*rgba8_alpha_mask_128)
|
||||
: "0", "1", "2", "%eax", "%ecx", "%edx", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7");
|
||||
op.A += 16;
|
||||
op.B += 16;
|
||||
op.D += 16;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (; op.n_pixels >= 2; op.n_pixels -= 2)
|
||||
{
|
||||
asm volatile (" movq %0,%%mm0\n"
|
||||
"\tmovq %1,%%mm1\n"
|
||||
"\tmovq %%mm1,%%mm3\n"
|
||||
"\tpxor %%mm2,%%mm2\n"
|
||||
"\tpunpcklbw %%mm2,%%mm3\n"
|
||||
"\tpunpcklbw %%mm0,%%mm2\n"
|
||||
|
||||
"\tmovq %3,%%mm4\n"
|
||||
"\tpsubw %%mm3,%%mm4\n"
|
||||
|
||||
"\t" pdivwuqX(mm2,mm4,mm5) "\n"
|
||||
|
||||
"\tmovq %%mm1,%%mm3\n"
|
||||
"\tpxor %%mm2,%%mm2\n"
|
||||
"\tpunpckhbw %%mm2,%%mm3\n"
|
||||
"\tpunpckhbw %%mm0,%%mm2\n"
|
||||
|
||||
"\tmovq %3,%%mm4\n"
|
||||
"\tpsubw %%mm3,%%mm4\n"
|
||||
|
||||
"\t" pdivwuqX(mm2,mm4,mm6) "\n"
|
||||
|
||||
"\tpackuswb %%mm6,%%mm5\n"
|
||||
|
||||
"\tmovq %4,%%mm6\n"
|
||||
"\tmovq %%mm1,%%mm7\n"
|
||||
"\tpminub %%mm0,%%mm7\n"
|
||||
"\tpand %%mm6,%%mm7\n"
|
||||
"\tpandn %%mm5,%%mm6\n"
|
||||
|
||||
"\tpor %%mm6,%%mm7\n"
|
||||
|
||||
"\tmovq %%mm7,%2\n"
|
||||
: /* empty */
|
||||
: "m" (*op.A), "m" (*op.B), "m" (*op.D), "m" (*rgba8_w256_64), "m" (*rgba8_alpha_mask_64)
|
||||
: "0", "1", "2", "%eax", "%ecx", "%edx", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7");
|
||||
op.A += 8;
|
||||
op.B += 8;
|
||||
op.D += 8;
|
||||
}
|
||||
|
||||
if (op.n_pixels)
|
||||
{
|
||||
asm volatile (" movd %0,%%mm0\n"
|
||||
"\tmovq %1,%%mm1\n"
|
||||
"\tmovq %%mm1,%%mm3\n"
|
||||
"\tpxor %%mm2,%%mm2\n"
|
||||
"\tpunpcklbw %%mm2,%%mm3\n"
|
||||
"\tpunpcklbw %%mm0,%%mm2\n"
|
||||
|
||||
"\tmovq %3,%%mm4\n"
|
||||
"\tpsubw %%mm3,%%mm4\n"
|
||||
|
||||
"\t" pdivwuqX(mm2,mm4,mm5) "\n"
|
||||
|
||||
"\tmovq %%mm1,%%mm3\n"
|
||||
"\tpxor %%mm2,%%mm2\n"
|
||||
"\tpunpckhbw %%mm2,%%mm3\n"
|
||||
"\tpunpckhbw %%mm0,%%mm2\n"
|
||||
|
||||
"\tmovq %3,%%mm4\n"
|
||||
"\tpsubw %%mm3,%%mm4\n"
|
||||
|
||||
"\t" pdivwuqX(mm2,mm4,mm6) "\n"
|
||||
|
||||
"\tpackuswb %%mm6,%%mm5\n"
|
||||
|
||||
"\tmovq %4,%%mm6\n"
|
||||
"\tmovq %%mm1,%%mm7\n"
|
||||
"\tpminub %%mm0,%%mm7\n"
|
||||
"\tpand %%mm6,%%mm7\n"
|
||||
"\tpandn %%mm5,%%mm6\n"
|
||||
|
||||
"\tpor %%mm6,%%mm7\n"
|
||||
|
||||
"\tmovd %%mm7,%2\n"
|
||||
: /* empty */
|
||||
: "m" (*op.A), "m" (*op.B), "m" (*op.D), "m" (*rgba8_w256_64), "m" (*rgba8_alpha_mask_64)
|
||||
: "0", "1", "2", "%eax", "%ecx", "%edx", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
||||
}
|
||||
|
||||
asm("emms");
|
||||
}
|
||||
|
||||
void
|
||||
xxxgimp_composite_divide_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
gimp_composite_grain_extract_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
{
|
||||
GimpCompositeContext op = *_op;
|
||||
|
||||
asm volatile (" movq %0,%%mm0\n"
|
||||
"\tpxor %%mm6,%%mm6\n"
|
||||
"\tmovq %1,%%mm7\n"
|
||||
"\tmovdqu %2,%%xmm0\n"
|
||||
"\tpxor %%xmm6,%%xmm6\n"
|
||||
"\tmovdqu %3,%%xmm7\n"
|
||||
: /* empty */
|
||||
: "m" (*rgba8_alpha_mask_64), "m" (*rgba8_w128_64), "m" (*rgba8_alpha_mask_128), "m" (*rgba8_w128_128)
|
||||
: "%mm0", "%mm6", "%mm7", "%xmm0", "%xmm6", "%xmm7");
|
||||
|
||||
for (; op.n_pixels >= 4; op.n_pixels -= 4)
|
||||
{
|
||||
asm volatile (" movdqu %1,%%xmm2\n"
|
||||
"\tmovdqu %2,%%xmm3\n"
|
||||
xmm_low_bytes_to_words(xmm2,xmm4,xmm6)
|
||||
xmm_low_bytes_to_words(xmm3,xmm5,xmm6)
|
||||
"\tpsubw %%xmm5,%%xmm4\n"
|
||||
"\tpaddw %%xmm7,%%xmm4\n"
|
||||
"\tmovdqu %%xmm4,%%xmm1\n"
|
||||
|
||||
xmm_high_bytes_to_words(xmm2,xmm4,xmm6)
|
||||
xmm_high_bytes_to_words(xmm3,xmm5,xmm6)
|
||||
|
||||
"\tpsubw %%xmm5,%%xmm4\n"
|
||||
"\tpaddw %%xmm7,%%xmm4\n"
|
||||
|
||||
"\tpackuswb %%xmm4,%%xmm1\n"
|
||||
"\tmovdqu %%xmm1,%%xmm4\n"
|
||||
|
||||
"\tmovdqu %%xmm0,%%xmm1\n"
|
||||
"\tpandn %%xmm4,%%xmm1\n"
|
||||
|
||||
"\tpminub %%xmm3,%%xmm2\n"
|
||||
"\tpand %%xmm0,%%xmm2\n"
|
||||
|
||||
"\tpor %%xmm2,%%xmm1\n"
|
||||
"\tmovdqu %%xmm1,%0\n"
|
||||
: "+m" (*op.D)
|
||||
: "m" (*op.A), "m" (*op.B)
|
||||
: "%xmm1", "%xmm2", "%xmm3", "%xmm4");
|
||||
op.A += 16;
|
||||
op.B += 16;
|
||||
op.D += 16;
|
||||
}
|
||||
|
||||
for (; op.n_pixels >= 2; op.n_pixels -= 2)
|
||||
{
|
||||
asm volatile (" movq %1,%%mm2\n"
|
||||
"\tmovq %2,%%mm3\n"
|
||||
mmx_low_bytes_to_words(mm2,mm4,mm6)
|
||||
mmx_low_bytes_to_words(mm3,mm5,mm6)
|
||||
"\tpsubw %%mm5,%%mm4\n"
|
||||
"\tpaddw %%mm7,%%mm4\n"
|
||||
"\tmovq %%mm4,%%mm1\n"
|
||||
|
||||
mmx_high_bytes_to_words(mm2,mm4,mm6)
|
||||
mmx_high_bytes_to_words(mm3,mm5,mm6)
|
||||
|
||||
"\tpsubw %%mm5,%%mm4\n"
|
||||
"\tpaddw %%mm7,%%mm4\n"
|
||||
|
||||
"\tpackuswb %%mm4,%%mm1\n"
|
||||
"\tmovq %%mm1,%%mm4\n"
|
||||
|
||||
"\tmovq %%mm0,%%mm1\n"
|
||||
"\tpandn %%mm4,%%mm1\n"
|
||||
|
||||
"\tpminub %%mm3,%%mm2\n"
|
||||
"\tpand %%mm0,%%mm2\n"
|
||||
|
||||
"\tpor %%mm2,%%mm1\n"
|
||||
"\tmovq %%mm1,%0\n"
|
||||
: "+m" (*op.D)
|
||||
: "m" (*op.A), "m" (*op.B)
|
||||
: "%mm1", "%mm2", "%mm3", "%mm4");
|
||||
op.A += 8;
|
||||
op.B += 8;
|
||||
op.D += 8;
|
||||
}
|
||||
|
||||
if (op.n_pixels)
|
||||
{
|
||||
asm volatile (" movd %1, %%mm2\n"
|
||||
"\tmovd %2, %%mm3\n"
|
||||
mmx_low_bytes_to_words(mm2,mm4,mm6)
|
||||
mmx_low_bytes_to_words(mm3,mm5,mm6)
|
||||
"\tpsubw %%mm5, %%mm4\n"
|
||||
"\tpaddw %%mm7, %%mm4\n"
|
||||
"\tmovq %%mm4, %%mm1\n"
|
||||
"\tpackuswb %%mm6, %%mm1\n"
|
||||
"\tmovq %%mm1, %%mm4\n"
|
||||
"\tmovq %%mm0, %%mm1\n"
|
||||
"\tpandn %%mm4, %%mm1\n"
|
||||
"\tpminub %%mm3, %%mm2\n"
|
||||
"\tpand %%mm0, %%mm2\n"
|
||||
"\tpor %%mm2, %%mm1\n"
|
||||
"\tmovd %%mm1, %0\n"
|
||||
: "+m" (*op.D)
|
||||
: "m" (*op.A), "m" (*op.B)
|
||||
: "%mm1", "%mm2", "%mm3", "%mm4");
|
||||
}
|
||||
|
||||
asm("emms");
|
||||
}
|
||||
|
||||
void
|
||||
xxxgimp_composite_dodge_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
gimp_composite_lighten_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
{
|
||||
GimpCompositeContext op = *_op;
|
||||
|
||||
asm volatile ("movq %0,%%mm0" : : "m" (*rgba8_alpha_mask_64) : "%mm0");
|
||||
|
||||
for (; op.n_pixels >= 4; op.n_pixels -= 4)
|
||||
{
|
||||
asm volatile (" movdqu %1, %%xmm2\n"
|
||||
"\tmovdqu %2, %%xmm3\n"
|
||||
"\tmovdqu %%xmm2, %%xmm4\n"
|
||||
"\tpmaxub %%xmm3, %%xmm4\n"
|
||||
"\tmovdqu %%xmm0, %%xmm1\n"
|
||||
"\tpandn %%xmm4, %%xmm1\n"
|
||||
"\tpminub %%xmm2, %%xmm3\n"
|
||||
"\tpand %%xmm0, %%xmm3\n"
|
||||
"\tpor %%xmm3, %%xmm1\n"
|
||||
"\tmovdqu %%xmm1, %0\n"
|
||||
: "=m" (*op.D)
|
||||
: "m" (*op.A), "m" (*op.B)
|
||||
: "%xmm1", "%xmm2", "%xmm3", "%xmm4");
|
||||
op.A += 16;
|
||||
op.B += 16;
|
||||
op.D += 16;
|
||||
}
|
||||
|
||||
for (; op.n_pixels >= 2; op.n_pixels -= 2)
|
||||
{
|
||||
asm volatile (" movq %1, %%mm2\n"
|
||||
"\tmovq %2, %%mm3\n"
|
||||
"\tmovq %%mm2, %%mm4\n"
|
||||
"\tpmaxub %%mm3, %%mm4\n"
|
||||
"\tmovq %%mm0, %%mm1\n"
|
||||
"\tpandn %%mm4, %%mm1\n"
|
||||
"\tpminub %%mm2, %%mm3\n"
|
||||
"\tpand %%mm0, %%mm3\n"
|
||||
"\tpor %%mm3, %%mm1\n"
|
||||
"\tmovq %%mm1, %0\n"
|
||||
: "=m" (*op.D)
|
||||
: "m" (*op.A), "m" (*op.B)
|
||||
: "%mm1", "%mm2", "%mm3", "%mm4");
|
||||
op.A += 8;
|
||||
op.B += 8;
|
||||
op.D += 8;
|
||||
}
|
||||
|
||||
if (op.n_pixels)
|
||||
{
|
||||
asm volatile (" movd %1, %%mm2\n"
|
||||
"\tmovd %2, %%mm3\n"
|
||||
"\tmovq %%mm2, %%mm4\n"
|
||||
"\tpmaxub %%mm3, %%mm4\n"
|
||||
"\tmovq %%mm0, %%mm1\n"
|
||||
"\tpandn %%mm4, %%mm1\n"
|
||||
"\tpminub %%mm2,%%mm3\n"
|
||||
"\tpand %%mm0, %%mm3\n"
|
||||
"\tpor %%mm3, %%mm1\n"
|
||||
"\tmovd %%mm1, %0\n"
|
||||
: "=m" (*op.D)
|
||||
: "m" (*op.A), "m" (*op.B)
|
||||
: "%mm1", "%mm2", "%mm3", "%mm4");
|
||||
}
|
||||
|
||||
asm("emms");
|
||||
}
|
||||
|
||||
void
|
||||
xxxgimp_composite_grain_extract_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
xxxgimp_composite_grain_merge_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
xxxgimp_composite_lighten_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
xxxgimp_composite_multiply_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
xxxgimp_composite_overlay_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
xxxgimp_composite_scale_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
xxxgimp_composite_screen_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
gimp_composite_subtract_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
{
|
||||
GimpCompositeContext op = *_op;
|
||||
|
||||
|
||||
asm volatile (" movq %0,%%mm0\n"
|
||||
"\tmovdqu %1,%%xmm0\n"
|
||||
: /* empty */
|
||||
: "m" (*rgba8_alpha_mask_64), "m" (*rgba8_alpha_mask_128)
|
||||
: "%mm0", "%xmm0");
|
||||
|
||||
|
||||
for (; op.n_pixels >= 4; op.n_pixels -= 4)
|
||||
{
|
||||
asm volatile (" movdqu %0,%%xmm2\n"
|
||||
"\tmovdqu %1,%%xmm3\n"
|
||||
asm volatile (" movdqu %1,%%xmm2\n"
|
||||
"\tmovdqu %2,%%xmm3\n"
|
||||
"\tmovdqu %%xmm2,%%xmm4\n"
|
||||
"\tpsubusb %%xmm3,%%xmm4\n"
|
||||
|
||||
|
||||
"\tmovdqu %%xmm0,%%xmm1\n"
|
||||
"\tpandn %%xmm4,%%xmm1\n"
|
||||
"\t" pminub(xmm3,xmm2,xmm4) "\n"
|
||||
"\tpminub %%xmm3,%%xmm2\n"
|
||||
"\tpand %%xmm0,%%xmm2\n"
|
||||
"\tpor %%xmm2,%%xmm1\n"
|
||||
"\tmovdqu %%xmm1,%2\n"
|
||||
: /* empty */
|
||||
: "m" (*op.A), "m" (*op.B), "m" (*op.D)
|
||||
: "0", "1", "2", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5");
|
||||
"\tmovdqu %%xmm1,%0\n"
|
||||
: "+m" (*op.D)
|
||||
: "m" (*op.A), "m" (*op.B)
|
||||
: "%xmm1", "%xmm2", "%xmm3", "%xmm4");
|
||||
op.A += 16;
|
||||
op.B += 16;
|
||||
op.D += 16;
|
||||
}
|
||||
|
||||
|
||||
for (; op.n_pixels >= 2; op.n_pixels -= 2)
|
||||
{
|
||||
asm volatile (" movq %0,%%mm2\n"
|
||||
"\tmovq %1,%%mm3\n"
|
||||
|
||||
asm volatile (" movq %1,%%mm2\n"
|
||||
"\tmovq %2,%%mm3\n"
|
||||
"\tmovq %%mm2,%%mm4\n"
|
||||
"\tpsubusb %%mm3,%%mm4\n"
|
||||
|
||||
"\tmovq %%mm0,%%mm1\n"
|
||||
"\tpandn %%mm4,%%mm1\n"
|
||||
|
||||
"\t" pminub(mm3,mm2,mm4) "\n"
|
||||
|
||||
"\tpminub %%mm3,%%mm2\n"
|
||||
"\tpand %%mm0,%%mm2\n"
|
||||
"\tpor %%mm2,%%mm1\n"
|
||||
"\tmovq %%mm1,%2\n"
|
||||
: /* empty */
|
||||
: "m" (*op.A), "m" (*op.B), "m" (*op.D)
|
||||
: "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
||||
"\tmovq %%mm1,%0\n"
|
||||
: "+m" (*op.D)
|
||||
: "m" (*op.A), "m" (*op.B)
|
||||
: "%mm1", "%mm2", "%mm3", "%mm4");
|
||||
op.A += 8;
|
||||
op.B += 8;
|
||||
op.D += 8;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (op.n_pixels)
|
||||
{
|
||||
asm volatile (" movd %0,%%mm2\n"
|
||||
"\tmovd %1,%%mm3\n"
|
||||
|
||||
asm volatile (" movd %1,%%mm2\n"
|
||||
"\tmovd %2,%%mm3\n"
|
||||
"\tmovq %%mm2,%%mm4\n"
|
||||
"\tpsubusb %%mm3,%%mm4\n"
|
||||
|
||||
"\tmovq %%mm0,%%mm1\n"
|
||||
"\tpandn %%mm4,%%mm1\n"
|
||||
|
||||
"\t" pminub(mm3,mm2,mm4) "\n"
|
||||
|
||||
"\tpminub %%mm3,%%mm2\n"
|
||||
"\tpand %%mm0,%%mm2\n"
|
||||
"\tpor %%mm2,%%mm1\n"
|
||||
"\tmovd %%mm1,%2\n"
|
||||
: /* empty */
|
||||
: "m" (*op.A), "m" (*op.B), "m" (*op.D)
|
||||
: "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
||||
}
|
||||
|
||||
"\tmovd %%mm1,%0\n"
|
||||
: "+m" (*op.D)
|
||||
: "m" (*op.A), "m" (*op.B)
|
||||
: "%mm1", "%mm2", "%mm3", "%mm4");
|
||||
}
|
||||
|
||||
asm("emms");
|
||||
}
|
||||
|
||||
@ -389,7 +684,7 @@ void
|
||||
gimp_composite_swap_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
{
|
||||
GimpCompositeContext op = *_op;
|
||||
|
||||
|
||||
/*
|
||||
* Inhale one whole i686 cache line at once. 64 bytes, 16 rgba8 pixels, 4 128 bit xmm registers.
|
||||
*/
|
||||
@ -403,7 +698,7 @@ gimp_composite_swap_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
"\tmovdqu %5,%%xmm5\n"
|
||||
"\tmovdqu %6,%%xmm6\n"
|
||||
"\tmovdqu %7,%%xmm7\n"
|
||||
|
||||
|
||||
"\tmovdqu %%xmm0,%1\n"
|
||||
"\tmovdqu %%xmm1,%0\n"
|
||||
"\tmovdqu %%xmm2,%3\n"
|
||||
@ -412,52 +707,53 @@ gimp_composite_swap_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
"\tmovdqu %%xmm5,%4\n"
|
||||
"\tmovdqu %%xmm6,%7\n"
|
||||
"\tmovdqu %%xmm7,%6\n"
|
||||
: "+m" (op.A[0]), "+m" (op.B[0]),
|
||||
"+m" (op.A[1]), "+m" (op.B[1]),
|
||||
"+m" (op.A[2]), "+m" (op.B[2]),
|
||||
"+m" (op.A[3]), "+m" (op.B[3])
|
||||
: /* empty */
|
||||
: "m" (op.A[0]), "m" (op.B[0]),
|
||||
"m" (op.A[1]), "m" (op.B[1]),
|
||||
"m" (op.A[2]), "m" (op.B[2]),
|
||||
"m" (op.A[3]), "m" (op.B[3])
|
||||
);
|
||||
: "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
|
||||
);
|
||||
op.A += 64;
|
||||
op.B += 64;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
for (; op.n_pixels >= 4; op.n_pixels -= 4)
|
||||
{
|
||||
asm volatile (" movdqu %0,%%xmm2\n"
|
||||
"\tmovdqu %1,%%xmm3\n"
|
||||
"\tmovdqu %%xmm3,%0\n"
|
||||
"\tmovdqu %%xmm2,%1\n"
|
||||
: /* empty */
|
||||
: "m" (*op.A), "m" (*op.B)
|
||||
: "0", "1", "%xmm1", "%xmm2", "%xmm3", "%xmm4");
|
||||
asm volatile (" movdqu %0,%%xmm2\n"
|
||||
"\tmovdqu %1,%%xmm3\n"
|
||||
"\tmovdqu %%xmm3,%0\n"
|
||||
"\tmovdqu %%xmm2,%1\n"
|
||||
: "+m" (*op.A), "+m" (*op.B)
|
||||
: /* empty */
|
||||
: "%xmm2", "%xmm3");
|
||||
op.A += 16;
|
||||
op.B += 16;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
for (; op.n_pixels >= 2; op.n_pixels -= 2)
|
||||
{
|
||||
asm volatile (" movq %0,%%mm2\n"
|
||||
"\tmovq %1,%%mm3\n"
|
||||
"\tmovq %%mm3,%0\n"
|
||||
"\tmovq %%mm2,%1\n"
|
||||
: /* empty */
|
||||
: "m" (*op.A), "m" (*op.B)
|
||||
: "0", "1", "%mm1", "%mm2", "%mm3", "%mm4");
|
||||
asm volatile (" movq %0,%%mm2\n"
|
||||
"\tmovq %1,%%mm3\n"
|
||||
"\tmovq %%mm3,%0\n"
|
||||
"\tmovq %%mm2,%1\n"
|
||||
: "+m" (*op.A), "+m" (*op.B)
|
||||
: /* empty */
|
||||
: "%mm2", "%mm3");
|
||||
op.A += 8;
|
||||
op.B += 8;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (op.n_pixels)
|
||||
{
|
||||
asm volatile (" movd %0,%%mm2\n"
|
||||
"\tmovd %1,%%mm3\n"
|
||||
"\tmovd %%mm3,%0\n"
|
||||
"\tmovd %%mm2,%1\n"
|
||||
: /* empty */
|
||||
: "m" (*op.A), "m" (*op.B)
|
||||
: "0", "1", "%mm1", "%mm2", "%mm3", "%mm4");
|
||||
}
|
||||
asm volatile (" movd %0,%%mm2\n"
|
||||
"\tmovd %1,%%mm3\n"
|
||||
"\tmovd %%mm3,%0\n"
|
||||
"\tmovd %%mm2,%1\n"
|
||||
: "+m" (*op.A), "+m" (*op.B)
|
||||
: /* empty */
|
||||
: "%mm1", "%mm2", "%mm3", "%mm4");
|
||||
}
|
||||
|
||||
asm("emms");
|
||||
}
|
||||
|
@ -10,7 +10,11 @@ extern void gimp_composite_sse2_init (void);
|
||||
extern void gimp_composite_sse2_install (void);
|
||||
|
||||
extern void gimp_composite_addition_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *);
|
||||
extern void gimp_composite_darken_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *);
|
||||
extern void gimp_composite_difference_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *);
|
||||
extern void gimp_composite_grain_extract_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *);
|
||||
extern void gimp_composite_lighten_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *);
|
||||
extern void gimp_composite_subtract_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *);
|
||||
extern void gimp_composite_swap_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *);
|
||||
extern void gimp_composite_dodge_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *);
|
||||
#endif
|
||||
|
@ -71,7 +71,7 @@ main (int argc, char *argv[])
|
||||
putenv ("GIMP_COMPOSITE=0x1");
|
||||
|
||||
iterations = 1;
|
||||
n_pixels = 1048577;
|
||||
n_pixels = 163921;
|
||||
|
||||
argv++, argc--;
|
||||
while (argc >= 2) {
|
||||
|
208
app/composite/gimp-composite-x86.h
Normal file
208
app/composite/gimp-composite-x86.h
Normal file
@ -0,0 +1,208 @@
|
||||
#if __GNUC__ >= 3
|
||||
|
||||
#define mmx_low_bytes_to_words(src,dst,zero) \
|
||||
"\tmovq %%"#src", %%"#dst"; " \
|
||||
"\tpunpcklbw %%"#zero", %%"#dst"\n"
|
||||
|
||||
#define mmx_high_bytes_to_words(src,dst,zero) \
|
||||
"\tmovq %%"#src", %%"#dst"; " \
|
||||
"\tpunpckhbw %%"#zero", %%"#dst"\n"
|
||||
|
||||
#define xmm_low_bytes_to_words(src,dst,zero) \
|
||||
"\tmovdqu %%"#src", %%"#dst"; " \
|
||||
"\tpunpcklbw %%"#zero", %%"#dst"\n"
|
||||
|
||||
#define xmm_high_bytes_to_words(src,dst,zero) \
|
||||
"\tmovdqu %%"#src", %%"#dst"; " \
|
||||
"\tpunpckhbw %%"#zero", %%"#dst"\n"
|
||||
|
||||
/* a = INT_MULT(a,b) */
|
||||
#define mmx_int_mult(a,b,w128) \
|
||||
"\tpmullw %%"#b", %%"#a"; " \
|
||||
"\tpaddw %%"#w128", %%"#a"; " \
|
||||
"\tmovq %%"#a", %%"#b"; " \
|
||||
"\tpsrlw $8, %%"#b"; " \
|
||||
"\tpaddw %%"#a", %%"#b"; " \
|
||||
"\tpsrlw $8, %%"#b"\n"
|
||||
|
||||
#define sse2_int_mult(a,b,w128) \
|
||||
"\tpmullw %%"#b", %%"#a"; " \
|
||||
"\tpaddw %%"#w128", %%"#a"; " \
|
||||
"\tmovdqu %%"#a", %%"#b"; " \
|
||||
"\tpsrlw $8, %%"#b"; " \
|
||||
"\tpaddw %%"#a", %%"#b"; " \
|
||||
"\tpsrlw $8, %%"#b"\n"
|
||||
|
||||
/*
|
||||
* Double-word divide. Adjusted for subsequent unsigned packing
|
||||
* (high-order bit of each word is cleared)
|
||||
* Clobbers eax, ecx edx
|
||||
*/
|
||||
#define pdivwX(dividend,divisor,quotient) "movd %%" #dividend ",%%eax; " \
|
||||
"movd %%" #divisor ",%%ecx; " \
|
||||
"xorl %%edx,%%edx; " \
|
||||
"divw %%cx; " \
|
||||
"roll $16, %%eax; " \
|
||||
"roll $16, %%ecx; " \
|
||||
"xorl %%edx,%%edx; " \
|
||||
"divw %%cx; " \
|
||||
"btr $15, %%eax; " \
|
||||
"roll $16, %%eax; " \
|
||||
"btr $15, %%eax; " \
|
||||
"movd %%eax,%%" #quotient ";"
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Quadword divide. No adjustment for subsequent unsigned packing
|
||||
* (high-order bit of each word is left alone)
|
||||
*/
|
||||
#define pdivwqX(dividend,divisor,quotient) "movd %%" #dividend ",%%eax; " \
|
||||
"movd %%" #divisor ",%%ecx; " \
|
||||
"xorl %%edx,%%edx; " \
|
||||
"divw %%cx; " \
|
||||
"roll $16, %%eax; " \
|
||||
"roll $16, %%ecx; " \
|
||||
"xorl %%edx,%%edx; " \
|
||||
"divw %%cx; " \
|
||||
"roll $16, %%eax; " \
|
||||
"movd %%eax,%%" #quotient "; " \
|
||||
"psrlq $32,%%" #dividend ";" \
|
||||
"psrlq $32,%%" #divisor ";" \
|
||||
"movd %%" #dividend ",%%eax; " \
|
||||
"movd %%" #divisor ",%%ecx; " \
|
||||
"xorl %%edx,%%edx; " \
|
||||
"divw %%cx; " \
|
||||
"roll $16, %%eax; " \
|
||||
"roll $16, %%ecx; " \
|
||||
"xorl %%edx,%%edx; " \
|
||||
"divw %%cx; " \
|
||||
"roll $16, %%eax; " \
|
||||
"movd %%eax,%%" #divisor ";" \
|
||||
"psllq $32,%%" #divisor ";" \
|
||||
"por %%" #divisor ",%%" #quotient ";"
|
||||
|
||||
/*
|
||||
* Quadword divide. Adjusted for subsequent unsigned packing
|
||||
* (high-order bit of each word is cleared)
|
||||
*/
|
||||
#define pdivwuqX(dividend,divisor,quotient) \
|
||||
pdivwX(dividend,divisor,quotient) \
|
||||
"psrlq $32,%%" #dividend ";" \
|
||||
"psrlq $32,%%" #divisor ";" \
|
||||
pdivwX(dividend,divisor,quotient) \
|
||||
"movd %%eax,%%" #divisor ";" \
|
||||
"psllq $32,%%" #divisor ";" \
|
||||
"por %%" #divisor ",%%" #quotient ";"
|
||||
|
||||
#define xmm_pdivwqX(dividend,divisor,quotient,scratch) "movd %%" #dividend ",%%eax; " \
|
||||
"movd %%" #divisor ",%%ecx; " \
|
||||
"xorl %%edx,%%edx; " \
|
||||
"divw %%cx; " \
|
||||
"roll $16, %%eax; " \
|
||||
"roll $16, %%ecx; " \
|
||||
"xorl %%edx,%%edx; " \
|
||||
"divw %%cx; " \
|
||||
"roll $16, %%eax; " \
|
||||
"movd %%eax,%%" #quotient "; " \
|
||||
"psrlq $32,%%" #divisor ";" \
|
||||
"psrlq $32,%%" #dividend ";" \
|
||||
"movd %%" #dividend ",%%eax; " \
|
||||
"movd %%" #divisor ",%%ecx; " \
|
||||
"xorl %%edx,%%edx; " \
|
||||
"divw %%cx; " \
|
||||
"roll $16, %%eax; " \
|
||||
"roll $16, %%ecx; " \
|
||||
"xorl %%edx,%%edx; " \
|
||||
"divw %%cx; " \
|
||||
"roll $16, %%eax; " \
|
||||
"movd %%eax,%%" #scratch ";" \
|
||||
"psllq $32,%%" #scratch ";" \
|
||||
"psrlq $32,%%" #divisor ";" \
|
||||
"psrlq $32,%%" #dividend ";" \
|
||||
"movd %%" #dividend ",%%eax; " \
|
||||
"movd %%" #divisor ",%%ecx; " \
|
||||
"xorl %%edx,%%edx; " \
|
||||
"divw %%cx; " \
|
||||
"roll $16, %%eax; " \
|
||||
"roll $16, %%ecx; " \
|
||||
"xorl %%edx,%%edx; " \
|
||||
"divw %%cx; " \
|
||||
"roll $16, %%eax; " \
|
||||
"movd %%eax,%%" #scratch ";" \
|
||||
"psllq $64,%%" #scratch ";" \
|
||||
"psrlq $32,%%" #divisor ";" \
|
||||
"psrlq $32,%%" #dividend ";" \
|
||||
"movd %%" #dividend ",%%eax; " \
|
||||
"movd %%" #divisor ",%%ecx; " \
|
||||
"xorl %%edx,%%edx; " \
|
||||
"divw %%cx; " \
|
||||
"roll $16, %%eax; " \
|
||||
"roll $16, %%ecx; " \
|
||||
"xorl %%edx,%%edx; " \
|
||||
"divw %%cx; " \
|
||||
"roll $16, %%eax; " \
|
||||
"movd %%eax,%%" #scratch ";" \
|
||||
"psllq $96,%%" #scratch ";" \
|
||||
"por %%" #scratch ",%%" #quotient ";"
|
||||
|
||||
#define xmm_pdivwX(dividend,divisor,quotient) "movd %%" #dividend ",%%eax; " \
|
||||
"movd %%" #divisor ",%%ecx; " \
|
||||
"xorl %%edx,%%edx; " \
|
||||
"divw %%cx; " \
|
||||
"roll $16, %%eax; " \
|
||||
"roll $16, %%ecx; " \
|
||||
"xorl %%edx,%%edx; " \
|
||||
"divw %%cx; " \
|
||||
"btr $15, %%eax; " \
|
||||
"roll $16, %%eax; " \
|
||||
"btr $15, %%eax; " \
|
||||
"movd %%eax,%%" #quotient ";"
|
||||
|
||||
#define xmm_pdivwuqX(dividend,divisor,quotient,scratch) \
|
||||
xmm_pdivwX(dividend,divisor,scratch) \
|
||||
"movd %%"#scratch ",%%"#quotient ";" \
|
||||
"psrlq $32,%%"#dividend ";" \
|
||||
"psrlq $32,%%"#divisor ";" \
|
||||
xmm_pdivwX(dividend,divisor,scratch) \
|
||||
"psllq $32,%%"#scratch ";" \
|
||||
"por %%"#scratch ",%%"#quotient ";" \
|
||||
"psrlq $32,%%"#dividend ";" \
|
||||
"psrlq $32,%%"#divisor ";" \
|
||||
xmm_pdivwX(dividend,divisor,scratch) \
|
||||
"psllq $64,%%"#scratch ";" \
|
||||
"por %%"#scratch ",%%"#quotient ";" \
|
||||
"psrlq $32,%%"#dividend ";" \
|
||||
"psrlq $32,%%"#divisor ";" \
|
||||
xmm_pdivwX(dividend,divisor,scratch) \
|
||||
"psllq $96,%%"#scratch ";" \
|
||||
"por %%"#scratch ",%%"#quotient
|
||||
|
||||
/* equivalent to the INT_MULT() macro in gimp-composite-generic.c */
|
||||
/*
|
||||
* opr2 = INT_MULT(opr1, opr2, t)
|
||||
*
|
||||
* Operates across quad-words using x86 word (16bit) value.
|
||||
* Result is left in opr2
|
||||
*
|
||||
* opr1 = opr1 * opr2 + w128
|
||||
* opr2 = opr1
|
||||
* opr2 = ((opr2 >> 8) + opr1) >> 8
|
||||
*/
|
||||
#define pmulwX(opr1,opr2,w128) \
|
||||
"\tpmullw %%"#opr2", %%"#opr1"; " \
|
||||
"\tpaddw %%"#w128", %%"#opr1"; " \
|
||||
"\tmovq %%"#opr1", %%"#opr2"; " \
|
||||
"\tpsrlw $8, %%"#opr2"; " \
|
||||
"\tpaddw %%"#opr1", %%"#opr2"; " \
|
||||
"\tpsrlw $8, %%"#opr2"\n"
|
||||
|
||||
#define xmm_pmulwX(opr1,opr2,w128) \
|
||||
"\tpmullw %%"#opr2", %%"#opr1"; " \
|
||||
"\tpaddw %%"#w128", %%"#opr1"; " \
|
||||
"\tmovdqu %%"#opr1", %%"#opr2"; " \
|
||||
"\tpsrlw $8, %%"#opr2"; " \
|
||||
"\tpaddw %%"#opr1", %%"#opr2"; " \
|
||||
"\tpsrlw $8, %%"#opr2"\n"
|
||||
|
||||
#endif
|
@ -317,6 +317,8 @@ def gimp_composite_regression(fpout, function_tables, options):
|
||||
#pp.pprint(function_tables)
|
||||
|
||||
generic_table = function_tables
|
||||
|
||||
composite_modes.sort();
|
||||
|
||||
for mode in composite_modes:
|
||||
for A in filter(lambda pf: pf != "GIMP_PIXELFORMAT_ANY", pixel_format):
|
||||
@ -368,6 +370,8 @@ def gimp_composite_regression(fpout, function_tables, options):
|
||||
print >>fpout, ' ft0 = gimp_composite_regression_time_function (iterations, %s, &generic_ctx);' % ("gimp_composite_dispatch")
|
||||
print >>fpout, ' ft1 = gimp_composite_regression_time_function (iterations, %s, &special_ctx);' % (generic_table[key][0])
|
||||
print >>fpout, ' if (gimp_composite_regression_compare_contexts ("%s", &generic_ctx, &special_ctx)) {' % (mode_name(mode))
|
||||
|
||||
print >>fpout, ' printf("%s failed\\n");' % (mode_name(mode))
|
||||
print >>fpout, ' return (1);'
|
||||
print >>fpout, ' }'
|
||||
print >>fpout, ' gimp_composite_regression_timer_report ("%s", ft0, ft1);' % (mode_name(mode))
|
||||
@ -518,7 +522,7 @@ op.add_option('-t', '--test', action='store_true', dest='test',
|
||||
help='generate regression testing code')
|
||||
op.add_option('-i', '--iterations', action='store', type='int', dest='iterations', default=1,
|
||||
help='number of iterations in regression tests')
|
||||
op.add_option('-n', '--n-pixels', action='store', type="int", dest='n_pixels', default=1024*1024+1,
|
||||
op.add_option('-n', '--n-pixels', action='store', type="int", dest='n_pixels', default=64*2049+16*2049+1,
|
||||
help='number of pixels in each regression test iteration')
|
||||
op.add_option('-r', '--requires', action='append', type='string', dest='requires', default=[],
|
||||
help='cpp #if conditionals')
|
||||
|
Reference in New Issue
Block a user