Add proposed patches to improve handling of flaky tests
With these changes, the flaky/known-failing tests are no longer installed as installed-tests at all, so remove them from the autopkgtest metadata.
This commit is contained in:
3
debian/patches/series
vendored
3
debian/patches/series
vendored
@ -1,5 +1,8 @@
|
||||
dist/Reinstate-gdk-wayland-cursor-meson.build.patch
|
||||
Flush-drawable-surface-when-getting-a-pixbuf.patch
|
||||
testsuite-Avoid-using-should_fail.patch
|
||||
testsuite-Try-enabling-a11y-tests-other-than-those-known-.patch
|
||||
testsuite-Don-t-create-.test-files-for-flaky-or-failing-t.patch
|
||||
016_no_offscreen_widgets_grabbing.patch
|
||||
017_no_offscreen_device_grabbing.patch
|
||||
060_ignore-random-icons.patch
|
||||
|
||||
163
debian/patches/testsuite-Avoid-using-should_fail.patch
vendored
Normal file
163
debian/patches/testsuite-Avoid-using-should_fail.patch
vendored
Normal file
@ -0,0 +1,163 @@
|
||||
From: Simon McVittie <smcv@debian.org>
|
||||
Date: Wed, 23 Nov 2022 18:36:08 +0000
|
||||
Subject: testsuite: Avoid using should_fail
|
||||
|
||||
There are two possible interpretations of "expected failure": either
|
||||
the test *must* fail (exactly the inverse of an ordinary test, with
|
||||
success becoming failure and failure becoming success), or the test
|
||||
*may* fail (with success intended, but failure possible in some
|
||||
environments). Autotools had the second interpretation, which seems
|
||||
more useful in practice, but Meson has the first.
|
||||
|
||||
In GTK 3.24.35, if the environment is such that the label-sizing.ui
|
||||
reftest happens to be successful, the overall result of the test suite
|
||||
is failure. This seems unlikely to have been the intention.
|
||||
|
||||
Instead of using should_fail, put the tests in one of two new suites:
|
||||
"flaky" is intended for tests that succeed or fail unpredictably
|
||||
according to the test environment or chance, while "failing" is for
|
||||
tests that ought to succeed but currently never do as a result of a
|
||||
bug or missing functionality. With a sufficiently new version of Meson,
|
||||
the flaky and failing tests are not run by default, but can be requested
|
||||
with a command like:
|
||||
|
||||
meson test --setup=unstable_tests --suite=flaky --suite=failing
|
||||
|
||||
This arrangement is inspired by GNOME/glib!2987, which was contributed
|
||||
by Marco Trevisan.
|
||||
|
||||
Signed-off-by: Simon McVittie <smcv@debian.org>
|
||||
---
|
||||
testsuite/a11y/meson.build | 4 ++--
|
||||
testsuite/gtk/meson.build | 17 +++++++++++++----
|
||||
testsuite/meson.build | 7 +++++++
|
||||
testsuite/reftests/meson.build | 17 ++++++++++++++---
|
||||
4 files changed, 36 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/testsuite/a11y/meson.build b/testsuite/a11y/meson.build
|
||||
index fa8b045..85d0f5d 100644
|
||||
--- a/testsuite/a11y/meson.build
|
||||
+++ b/testsuite/a11y/meson.build
|
||||
@@ -66,7 +66,7 @@ foreach t: a11y_state_tests
|
||||
'GSETTINGS_SCHEMA_DIR=@0@'.format(gtk_schema_build_dir),
|
||||
'GTK_TEST_MESON=1',
|
||||
],
|
||||
- suite: 'a11y',
|
||||
+ suite: ['a11y', 'flaky'],
|
||||
)
|
||||
endif
|
||||
endforeach
|
||||
@@ -100,7 +100,7 @@ foreach t: a11y_tests
|
||||
'G_TEST_BUILDDIR=@0@'.format(meson.current_build_dir()),
|
||||
'GSETTINGS_SCHEMA_DIR=@0@'.format(gtk_schema_build_dir),
|
||||
],
|
||||
- suite: 'a11y')
|
||||
+ suite: ['a11y', 'flaky'])
|
||||
endforeach
|
||||
|
||||
installed_test_data = [
|
||||
diff --git a/testsuite/gtk/meson.build b/testsuite/gtk/meson.build
|
||||
index 206af0d..137f93a 100644
|
||||
--- a/testsuite/gtk/meson.build
|
||||
+++ b/testsuite/gtk/meson.build
|
||||
@@ -50,7 +50,9 @@ tests = [
|
||||
['revealer-size'],
|
||||
]
|
||||
|
||||
-# Tests that are expected to fail
|
||||
+# Tests that are expected to fail, sometimes or always
|
||||
+flaky = [
|
||||
+]
|
||||
xfail = [
|
||||
]
|
||||
|
||||
@@ -83,7 +85,15 @@ foreach t : tests
|
||||
install: get_option('installed_tests'),
|
||||
install_dir: installed_test_bindir)
|
||||
|
||||
- expect_fail = xfail.contains(test_name)
|
||||
+ suites = ['gtk']
|
||||
+
|
||||
+ if flaky.contains(test_name)
|
||||
+ suites += 'flaky'
|
||||
+ endif
|
||||
+
|
||||
+ if xfail.contains(test_name)
|
||||
+ suites += 'failing'
|
||||
+ endif
|
||||
|
||||
test(test_name, test_exe,
|
||||
args: [ '--tap', '-k' ],
|
||||
@@ -97,8 +107,7 @@ foreach t : tests
|
||||
'GSETTINGS_SCHEMA_DIR=@0@'.format(gtk_schema_build_dir),
|
||||
'GTK_TEST_MESON=1',
|
||||
],
|
||||
- suite: 'gtk',
|
||||
- should_fail: expect_fail,
|
||||
+ suite: suites,
|
||||
)
|
||||
endforeach
|
||||
|
||||
diff --git a/testsuite/meson.build b/testsuite/meson.build
|
||||
index 289f272..032baf2 100644
|
||||
--- a/testsuite/meson.build
|
||||
+++ b/testsuite/meson.build
|
||||
@@ -2,6 +2,13 @@ gtk_libexecdir = join_paths(gtk_prefix, get_option('libexecdir'))
|
||||
installed_test_bindir = join_paths(gtk_libexecdir, 'installed-tests', 'gtk+')
|
||||
installed_test_datadir = join_paths(gtk_datadir, 'installed-tests', 'gtk+')
|
||||
|
||||
+if meson.version().version_compare('>=0.57.0')
|
||||
+ add_test_setup('default',
|
||||
+ is_default: true,
|
||||
+ exclude_suites: ['flaky', 'failing'],
|
||||
+ )
|
||||
+endif
|
||||
+
|
||||
subdir('gtk')
|
||||
subdir('gdk')
|
||||
subdir('css')
|
||||
diff --git a/testsuite/reftests/meson.build b/testsuite/reftests/meson.build
|
||||
index 2135ebb..8c3c3e8 100644
|
||||
--- a/testsuite/reftests/meson.build
|
||||
+++ b/testsuite/reftests/meson.build
|
||||
@@ -419,7 +419,7 @@ test_data = [
|
||||
]
|
||||
|
||||
# Depending on the environment these fail, feel free to fix them
|
||||
-somehow_broken = [
|
||||
+flaky = [
|
||||
'button-wrapping.ui',
|
||||
'cellrenderer-pixbuf-stock-rtl.ui',
|
||||
'label-sizing.ui',
|
||||
@@ -428,9 +428,21 @@ somehow_broken = [
|
||||
'symbolic-icon-translucent-color.ui',
|
||||
'window-height-for-width.ui',
|
||||
]
|
||||
+xfail = [
|
||||
+]
|
||||
|
||||
foreach testname : test_data
|
||||
if testname.endswith('.ui') and not testname.endswith('.ref.ui')
|
||||
+ suites = ['reftest']
|
||||
+
|
||||
+ if flaky.contains(testname)
|
||||
+ suites += 'flaky'
|
||||
+ endif
|
||||
+
|
||||
+ if xfail.contains(testname)
|
||||
+ suites += 'failing'
|
||||
+ endif
|
||||
+
|
||||
# reftests fail when multiple windows open at the same time stealing the focus,
|
||||
# so set is_parallel to false
|
||||
test('reftest ' + testname, gtk_reftest,
|
||||
@@ -450,8 +462,7 @@ foreach testname : test_data
|
||||
'G_TEST_BUILDDIR=@0@'.format(meson.current_build_dir()),
|
||||
'REFTEST_MODULE_DIR=@0@'.format(meson.current_build_dir()),
|
||||
],
|
||||
- suite: 'reftest',
|
||||
- should_fail: somehow_broken.contains(testname),
|
||||
+ suite: suites,
|
||||
is_parallel: false)
|
||||
endif
|
||||
endforeach
|
||||
93
debian/patches/testsuite-Don-t-create-.test-files-for-flaky-or-failing-t.patch
vendored
Normal file
93
debian/patches/testsuite-Don-t-create-.test-files-for-flaky-or-failing-t.patch
vendored
Normal file
@ -0,0 +1,93 @@
|
||||
From: Simon McVittie <smcv@debian.org>
|
||||
Date: Wed, 23 Nov 2022 21:26:50 +0000
|
||||
Subject: testsuite: Don't create .test files for flaky or failing tests
|
||||
|
||||
These tests can be run manually, but are not suitable for use as an
|
||||
acceptance test, so let's not make frameworks like Debian's autopkgtest
|
||||
run these when they run ginsttest-runner in the most obvious way.
|
||||
|
||||
a11ytests.test doesn't seem to be reliable enough to be used as a QA
|
||||
acceptance criterion, and has been disabled as a build-time test in both
|
||||
Gitlab-CI and Debian since 2019. a11ystate.test is not set up to be run
|
||||
at build time at all, and has been marked as flaky on ci.debian.net
|
||||
since 2018.
|
||||
|
||||
The rest of the testsuite/a11y directory seems to have been
|
||||
reliable in practice, at least on ci.debian.net, so try leaving them
|
||||
enabled as installed-tests.
|
||||
|
||||
In principle this could be made finer-grained by having a separate .test
|
||||
file and a separate Meson test() for each .ui file, but that would
|
||||
require more active maintenance of GTK 3.
|
||||
|
||||
Signed-off-by: Simon McVittie <smcv@debian.org>
|
||||
---
|
||||
testsuite/a11y/meson.build | 7 ++++++-
|
||||
testsuite/a11y/state/meson.build | 7 +++++--
|
||||
testsuite/gtk/meson.build | 7 ++++++-
|
||||
3 files changed, 17 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/testsuite/a11y/meson.build b/testsuite/a11y/meson.build
|
||||
index ea6348c..2613e3c 100644
|
||||
--- a/testsuite/a11y/meson.build
|
||||
+++ b/testsuite/a11y/meson.build
|
||||
@@ -181,7 +181,12 @@ installed_test_data = [
|
||||
]
|
||||
|
||||
a11y_installed_tests = [
|
||||
- 'a11ytests.test',
|
||||
+ # This is the equivalent of a11y_state_tests above, and does not seem
|
||||
+ # to be reliable enough to act as a QA gate in practice. We install the
|
||||
+ # test executable and the data needed to run it, but don't hook it up
|
||||
+ # to ginsttest-runner.
|
||||
+ #'a11ytests.test',
|
||||
+
|
||||
'a11ychildren.test',
|
||||
'a11ytree.test',
|
||||
'a11yvalue.test',
|
||||
diff --git a/testsuite/a11y/state/meson.build b/testsuite/a11y/state/meson.build
|
||||
index 782649e..aa18a84 100644
|
||||
--- a/testsuite/a11y/state/meson.build
|
||||
+++ b/testsuite/a11y/state/meson.build
|
||||
@@ -16,7 +16,10 @@ test_data = [
|
||||
|
||||
|
||||
a11y_installed_tests = [
|
||||
- 'a11ystate.test',
|
||||
+ # This is not run at build time at all, and consistently fails on
|
||||
+ # Debian's CI infrastructure, so don't set it up to be run by
|
||||
+ # ginsttest-runner.
|
||||
+ #'a11ystate.test',
|
||||
]
|
||||
|
||||
if get_option('installed_tests')
|
||||
@@ -32,4 +35,4 @@ if get_option('installed_tests')
|
||||
endforeach
|
||||
|
||||
install_data(test_data, install_dir: join_paths(installed_test_bindir, 'state'))
|
||||
-endif
|
||||
\ No newline at end of file
|
||||
+endif
|
||||
diff --git a/testsuite/gtk/meson.build b/testsuite/gtk/meson.build
|
||||
index 137f93a..84a8cfc 100644
|
||||
--- a/testsuite/gtk/meson.build
|
||||
+++ b/testsuite/gtk/meson.build
|
||||
@@ -146,6 +146,11 @@ endif
|
||||
if get_option('installed_tests')
|
||||
foreach t : tests
|
||||
test_name = t.get(0)
|
||||
+
|
||||
+ if flaky.contains(test_name) or xfail.contains(test_name)
|
||||
+ continue
|
||||
+ endif
|
||||
+
|
||||
conf = configuration_data()
|
||||
conf.set('testexecdir', installed_test_bindir)
|
||||
conf.set('test', test_name)
|
||||
@@ -158,4 +163,4 @@ if get_option('installed_tests')
|
||||
install_subdir('icons', install_dir: installed_test_bindir)
|
||||
install_subdir('icons2', install_dir: installed_test_bindir)
|
||||
install_subdir('ui', install_dir: installed_test_bindir)
|
||||
-endif
|
||||
\ No newline at end of file
|
||||
+endif
|
||||
29
debian/patches/testsuite-Try-enabling-a11y-tests-other-than-those-known-.patch
vendored
Normal file
29
debian/patches/testsuite-Try-enabling-a11y-tests-other-than-those-known-.patch
vendored
Normal file
@ -0,0 +1,29 @@
|
||||
From: Simon McVittie <smcv@debian.org>
|
||||
Date: Thu, 24 Nov 2022 12:09:12 +0000
|
||||
Subject: testsuite: Try enabling a11y tests,
|
||||
other than those known to be unstable
|
||||
|
||||
At least some of the tests implemented via the accessibility-dump
|
||||
executable are known to be unstable, but the tests based on separate
|
||||
executables (tree-performance.c, etc.) have been reasonably consistently
|
||||
passing on ci.debian.net for several years, so hopefully they are also
|
||||
reliable enough for upstream CI and we don't need to mark them as flaky?
|
||||
|
||||
Signed-off-by: Simon McVittie <smcv@debian.org>
|
||||
---
|
||||
testsuite/a11y/meson.build | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/testsuite/a11y/meson.build b/testsuite/a11y/meson.build
|
||||
index 85d0f5d..ea6348c 100644
|
||||
--- a/testsuite/a11y/meson.build
|
||||
+++ b/testsuite/a11y/meson.build
|
||||
@@ -100,7 +100,7 @@ foreach t: a11y_tests
|
||||
'G_TEST_BUILDDIR=@0@'.format(meson.current_build_dir()),
|
||||
'GSETTINGS_SCHEMA_DIR=@0@'.format(gtk_schema_build_dir),
|
||||
],
|
||||
- suite: ['a11y', 'flaky'])
|
||||
+ suite: ['a11y'])
|
||||
endforeach
|
||||
|
||||
installed_test_data = [
|
||||
1
debian/run-tests.sh
vendored
1
debian/run-tests.sh
vendored
@ -45,7 +45,6 @@ for BACKEND in x11; do
|
||||
dbus-run-session -- \
|
||||
xvfb-run -a \
|
||||
dh_auto_test --builddirectory="$BUILDDIR" -- \
|
||||
--no-suite=gtk+-3.0:a11y \
|
||||
"$@" \
|
||||
|| touch "$test_data/tests-failed"
|
||||
|
||||
|
||||
4
debian/tests/control
vendored
4
debian/tests/control
vendored
@ -5,7 +5,3 @@ Restrictions: allow-stderr, superficial
|
||||
Tests: installed-tests
|
||||
Depends: at-spi2-core, dbus-daemon, gnome-desktop-testing (>= 2018.1-1~), gtk-3-examples, librsvg2-common, xauth, xvfb
|
||||
Restrictions: allow-stderr
|
||||
|
||||
Tests: installed-tests-a11ystate installed-tests-reftests
|
||||
Depends: at-spi2-core, dbus-daemon, gnome-desktop-testing (>= 2018.1-1~), gtk-3-examples, librsvg2-common, xauth, xvfb
|
||||
Restrictions: allow-stderr, flaky
|
||||
|
||||
19
debian/tests/installed-tests-a11ystate
vendored
19
debian/tests/installed-tests-a11ystate
vendored
@ -1,19 +0,0 @@
|
||||
#!/bin/sh
|
||||
# autopkgtest check: Run the installed-tests to verify GTK works correctly
|
||||
# Based on glib2.0's d/tests/installed-tests, (C) 2013 Canonical Ltd.
|
||||
|
||||
set -e
|
||||
|
||||
# Disable gvfs if it happens to be installed. We want to test the built-in
|
||||
# stuff
|
||||
export GIO_USE_VFS=local
|
||||
export GIO_USE_VOLUME_MONITOR=unix
|
||||
|
||||
export XDG_RUNTIME_DIR="$AUTOPKGTEST_TMP"
|
||||
|
||||
exec dbus-run-session -- \
|
||||
xvfb-run -a -s "-screen 0 1024x768x24" \
|
||||
gnome-desktop-testing-runner \
|
||||
--report-directory="$AUTOPKGTEST_ARTIFACTS" \
|
||||
--tap \
|
||||
"gtk+/a11ystate.test"
|
||||
19
debian/tests/installed-tests-reftests
vendored
19
debian/tests/installed-tests-reftests
vendored
@ -1,19 +0,0 @@
|
||||
#!/bin/sh
|
||||
# autopkgtest check: Run the installed-tests to verify GTK works correctly
|
||||
# Based on glib2.0's d/tests/installed-tests, (C) 2013 Canonical Ltd.
|
||||
|
||||
set -e
|
||||
|
||||
# Disable gvfs if it happens to be installed. We want to test the built-in
|
||||
# stuff
|
||||
export GIO_USE_VFS=local
|
||||
export GIO_USE_VOLUME_MONITOR=unix
|
||||
|
||||
export XDG_RUNTIME_DIR="$AUTOPKGTEST_TMP"
|
||||
|
||||
exec dbus-run-session -- \
|
||||
xvfb-run -a -s "-screen 0 1024x768x24" \
|
||||
gnome-desktop-testing-runner \
|
||||
--report-directory="$AUTOPKGTEST_ARTIFACTS" \
|
||||
--tap \
|
||||
"gtk+/reftests"
|
||||
Reference in New Issue
Block a user