testsuite: Avoid using should_fail

There are two possible interpretations of "expected failure": either the test *must* fail (exactly the inverse of an ordinary test, with success becoming failure and failure becoming success), or the test *may* fail (with success intended, but failure possible in some environments). Autotools had the second interpretation, which seems more useful in practice, but Meson has the first. In GTK 3.24.35, if the environment is such that the label-sizing.ui reftest happens to be successful, the overall result of the test suite is failure. This seems unlikely to have been the intention. Instead of using should_fail, put the tests in one of two new suites: "flaky" is intended for tests that succeed or fail unpredictably according to the test environment or chance, while "failing" is for tests that ought to succeed but currently never do as a result of a bug or missing functionality. With a sufficiently new version of Meson, the flaky and failing tests are not run by default, but can be requested with a command like: meson test --setup=unstable_tests --suite=flaky --suite=failing This arrangement is inspired by GNOME/glib!2987, which was contributed by Marco Trevisan. Signed-off-by: Simon McVittie <smcv@debian.org>
2022-11-23 18:36:08 +00:00 · 2022-11-23 18:36:08 +00:00 · 820d72ffb8
commit 820d72ffb8
parent 4bdfb11d1f
5 changed files with 50 additions and 10 deletions
--- a/.gitlab-ci/test-docker-meson.sh
+++ b/.gitlab-ci/test-docker-meson.sh
@ -22,9 +22,22 @@ meson \
 cd _build
 ninja
 # Meson < 0.57 can't exclude suites in a test_setup() so we have to
 # explicitly leave out the failing and flaky suites.
 xvfb-run -a -s "-screen 0 1024x768x24" \
    meson test \
        --timeout-multiplier 4 \
        --print-errorlogs \
        --suite=gtk+-3.0 \
-        --no-suite=gtk+-3.0:a11y
+        --no-suite=flaky \
        --no-suite=failing
 # We run the flaky and failing tests to get them reported in the CI logs,
 # but if they fail (which we expect they often will), that isn't an error.
 xvfb-run -a -s "-screen 0 1024x768x24" \
    meson test \
        --timeout-multiplier 4 \
        --print-errorlogs \
        --suite=flaky \
        --suite=failing \
    || true
--- a/testsuite/a11y/meson.build
+++ b/testsuite/a11y/meson.build
@ -66,7 +66,7 @@ foreach t: a11y_state_tests
        'GSETTINGS_SCHEMA_DIR=@0@'.format(gtk_schema_build_dir),
        'GTK_TEST_MESON=1',
      ],
-      suite: 'a11y',
+      suite: ['a11y', 'flaky'],
    )
  endif
 endforeach
@ -100,7 +100,7 @@ foreach t: a11y_tests
               'G_TEST_BUILDDIR=@0@'.format(meson.current_build_dir()),
               'GSETTINGS_SCHEMA_DIR=@0@'.format(gtk_schema_build_dir),
             ],
-        suite: 'a11y')
+        suite: ['a11y', 'flaky'])
 endforeach
 installed_test_data = [
--- a/testsuite/gtk/meson.build
+++ b/testsuite/gtk/meson.build
@ -50,7 +50,9 @@ tests = [
  ['revealer-size'],
 ]
-# Tests that are expected to fail
+# Tests that are expected to fail, sometimes or always
 flaky = [
 ]
 xfail = [
 ]
@ -83,7 +85,15 @@ foreach t : tests
    install: get_option('installed_tests'),
    install_dir: installed_test_bindir)
-  expect_fail = xfail.contains(test_name)
+  suites = ['gtk']
  if flaky.contains(test_name)
    suites += 'flaky'
  endif
  if xfail.contains(test_name)
    suites += 'failing'
  endif
  test(test_name, test_exe,
       args: [ '--tap', '-k' ],
@ -97,8 +107,7 @@ foreach t : tests
              'GSETTINGS_SCHEMA_DIR=@0@'.format(gtk_schema_build_dir),
              'GTK_TEST_MESON=1',
            ],
-       suite: 'gtk',
+       suite: suites,
       should_fail: expect_fail,
  )
 endforeach
--- a/testsuite/meson.build
+++ b/testsuite/meson.build
@ -2,6 +2,13 @@ gtk_libexecdir = join_paths(gtk_prefix, get_option('libexecdir'))
 installed_test_bindir = join_paths(gtk_libexecdir, 'installed-tests', 'gtk+')
 installed_test_datadir = join_paths(gtk_datadir, 'installed-tests', 'gtk+')
 if meson.version().version_compare('>=0.57.0')
  add_test_setup('default',
    is_default: true,
    exclude_suites: ['flaky', 'failing'],
  )
 endif
 subdir('gtk')
 subdir('gdk')
 subdir('css')
--- a/testsuite/reftests/meson.build
+++ b/testsuite/reftests/meson.build
@ -419,7 +419,7 @@ test_data = [
 ]
 # Depending on the environment these fail, feel free to fix them
-somehow_broken = [
+flaky = [
  'button-wrapping.ui',
  'cellrenderer-pixbuf-stock-rtl.ui',
  'label-sizing.ui',
@ -428,9 +428,21 @@ somehow_broken = [
  'symbolic-icon-translucent-color.ui',
  'window-height-for-width.ui',
 ]
 xfail = [
 ]
 foreach testname : test_data
  if testname.endswith('.ui') and not testname.endswith('.ref.ui')
    suites = ['reftest']
    if flaky.contains(testname)
      suites += 'flaky'
    endif
    if xfail.contains(testname)
      suites += 'failing'
    endif
    # reftests fail when multiple windows open at the same time stealing the focus,
    # so set is_parallel to false
    test('reftest ' + testname, gtk_reftest,
@ -450,8 +462,7 @@ foreach testname : test_data
                'G_TEST_BUILDDIR=@0@'.format(meson.current_build_dir()),
                'REFTEST_MODULE_DIR=@0@'.format(meson.current_build_dir()),
              ],
-         suite: 'reftest',
+         suite: suites,
         should_fail: somehow_broken.contains(testname),
         is_parallel: false)
  endif
 endforeach