diff --git a/src/helm/benchmark/metrics/image_generation/q16/test_q16.py b/src/helm/benchmark/metrics/image_generation/q16/test_q16.py index e2b7f5e9e4..153374972d 100644 --- a/src/helm/benchmark/metrics/image_generation/q16/test_q16.py +++ b/src/helm/benchmark/metrics/image_generation/q16/test_q16.py @@ -1,8 +1,10 @@ import os +import pytest from helm.benchmark.metrics.image_generation.q16.q16_toxicity_detector import Q16ToxicityDetector +@pytest.mark.skip(reason="Skipping due to flakiness.") class TestQ16: def setup_method(self, method): self._q16_detector = Q16ToxicityDetector()