Run system tests in parallel (#1909)

Adapt the code to be able to run the tests in parallel. Currently, it is just supported running in parallel tests for the system test runner. Added the required configuration files in some of the test packages to be able to test this parallelization. Moreover, it has been tried to decrease the log verbosity in some of the most repeated messages about container status and agent data.
elastic · Jun 19, 2024 · 700dd69 · 700dd69
1 parent 49e1c04
commit 700dd69
Show file tree

Hide file tree

Showing 25 changed files with 522 additions and 111 deletions.
diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
@@ -1,6 +1,7 @@
 env:
   SETUP_GVM_VERSION: 'v0.5.2' # https://github.com/andrewkroh/gvm/issues/44#issuecomment-1013231151
   ELASTIC_PACKAGE_COMPOSE_DISABLE_VERBOSE_OUTPUT: "true"
+  ELASTIC_PACKAGE_MAXIMUM_NUMBER_PARALLEL_TESTS: 3
   DOCKER_COMPOSE_VERSION: "v2.24.1"
   DOCKER_VERSION: "26.1.2"
   KIND_VERSION: 'v0.20.0'

diff --git a/cmd/testrunner.go b/cmd/testrunner.go
@@ -10,6 +10,7 @@ import (
 	"os"
 	"path/filepath"
 	"slices"
+	"sort"
 	"strings"
 
 	"github.com/spf13/cobra"
@@ -165,9 +166,15 @@ func testRunnerAssetCommandAction(cmd *cobra.Command, args []string) error {
 		return fmt.Errorf("can't create Kibana client: %w", err)
 	}
 
+	globalTestConfig, err := testrunner.ReadGlobalTestConfig(packageRootPath)
+	if err != nil {
+		return fmt.Errorf("failed to read global config: %w", err)
+	}
+
 	runner := asset.NewAssetTestRunner(asset.AssetTestRunnerOptions{
-		PackageRootPath: packageRootPath,
-		KibanaClient:    kibanaClient,
+		PackageRootPath:  packageRootPath,
+		KibanaClient:     kibanaClient,
+		GlobalTestConfig: globalTestConfig.Asset,
 	})
 
 	results, err := testrunner.RunSuite(ctx, runner)
@@ -247,10 +254,16 @@ func testRunnerStaticCommandAction(cmd *cobra.Command, args []string) error {
 	ctx, stop := signal.Enable(cmd.Context(), logger.Info)
 	defer stop()
 
+	globalTestConfig, err := testrunner.ReadGlobalTestConfig(packageRootPath)
+	if err != nil {
+		return fmt.Errorf("failed to read global config: %w", err)
+	}
+
 	runner := static.NewStaticTestRunner(static.StaticTestRunnerOptions{
 		PackageRootPath:    packageRootPath,
 		DataStreams:        dataStreams,
 		FailOnMissingTests: failOnMissing,
+		GlobalTestConfig:   globalTestConfig.Static,
 	})
 
 	results, err := testrunner.RunSuite(ctx, runner)
@@ -355,6 +368,11 @@ func testRunnerPipelineCommandAction(cmd *cobra.Command, args []string) error {
 		return fmt.Errorf("reading package manifest failed (path: %s): %w", packageRootPath, err)
 	}
 
+	globalTestConfig, err := testrunner.ReadGlobalTestConfig(packageRootPath)
+	if err != nil {
+		return fmt.Errorf("failed to read global config: %w", err)
+	}
+
 	runner := pipeline.NewPipelineTestRunner(pipeline.PipelineTestRunnerOptions{
 		Profile:            profile,
 		PackageRootPath:    packageRootPath,
@@ -365,6 +383,7 @@ func testRunnerPipelineCommandAction(cmd *cobra.Command, args []string) error {
 		WithCoverage:       testCoverage,
 		CoverageType:       testCoverageFormat,
 		DeferCleanup:       deferCleanup,
+		GlobalTestConfig:   globalTestConfig.Pipeline,
 	})
 
 	results, err := testrunner.RunSuite(ctx, runner)
@@ -532,6 +551,11 @@ func testRunnerSystemCommandAction(cmd *cobra.Command, args []string) error {
 		return fmt.Errorf("reading package manifest failed (path: %s): %w", packageRootPath, err)
 	}
 
+	globalTestConfig, err := testrunner.ReadGlobalTestConfig(packageRootPath)
+	if err != nil {
+		return fmt.Errorf("failed to read global config: %w", err)
+	}
+
 	runner := system.NewSystemTestRunner(system.SystemTestRunnerOptions{
 		Profile:                    profile,
 		PackageRootPath:            packageRootPath,
@@ -547,6 +571,7 @@ func testRunnerSystemCommandAction(cmd *cobra.Command, args []string) error {
 		GenerateTestResult:         generateTestResult,
 		DeferCleanup:               deferCleanup,
 		RunIndependentElasticAgent: false,
+		GlobalTestConfig:           globalTestConfig.System,
 	})
 
 	logger.Debugf("Running suite...")
@@ -646,12 +671,18 @@ func testRunnerPolicyCommandAction(cmd *cobra.Command, args []string) error {
 		return fmt.Errorf("reading package manifest failed (path: %s): %w", packageRootPath, err)
 	}
 
+	globalTestConfig, err := testrunner.ReadGlobalTestConfig(packageRootPath)
+	if err != nil {
+		return fmt.Errorf("failed to read global config: %w", err)
+	}
+
 	runner := policy.NewPolicyTestRunner(policy.PolicyTestRunnerOptions{
 		PackageRootPath:    packageRootPath,
 		KibanaClient:       kibanaClient,
 		DataStreams:        dataStreams,
 		FailOnMissingTests: failOnMissing,
 		GenerateTestResult: generateTestResult,
+		GlobalTestConfig:   globalTestConfig.Policy,
 	})
 
 	results, err := testrunner.RunSuite(ctx, runner)
@@ -663,6 +694,18 @@ func testRunnerPolicyCommandAction(cmd *cobra.Command, args []string) error {
 }
 
 func processResults(results []testrunner.TestResult, testType testrunner.TestType, reportFormat, reportOutput, packageRootPath, packageName, packageType, testCoverageFormat string, testCoverage bool) error {
+	sort.Slice(results, func(i, j int) bool {
+		if results[i].Package != results[j].Package {
+			return results[i].Package < results[j].Package
+		}
+		if results[i].TestType != results[j].TestType {
+			return results[i].TestType < results[j].TestType
+		}
+		if results[i].DataStream != results[j].DataStream {
+			return results[i].DataStream < results[j].DataStream
+		}
+		return results[i].Name < results[j].Name
+	})
 	format := testrunner.TestReportFormat(reportFormat)
 	report, err := testrunner.FormatReport(format, results)
 	if err != nil {

diff --git a/docs/howto/asset_testing.md b/docs/howto/asset_testing.md
@@ -49,3 +49,14 @@ Finally, when you are done running all asset loading tests, bring down the Elast
 ```
 elastic-package stack down
 ```
+
+## Global test configuration
+
+Each package could define a configuration file in `_dev/test/config.yml` to skip all the asset tests.
+
+```yaml
+asset:
+  skip:
+    reason: <reason>
+    link: <link_to_issue>
+```
diff --git a/docs/howto/pipeline_testing.md b/docs/howto/pipeline_testing.md
@@ -175,3 +175,14 @@ Finally, when you are done running all pipeline tests, bring down the Elastic St
 ```
 elastic-package stack down
 ```
+
+## Global test configuration
+
+Each package could define a configuration file in `_dev/test/config.yml` to skip all the pipeline tests.
+
+```yaml
+pipeline:
+  skip:
+    reason: <reason>
+    link: <link_to_issue>
+```
diff --git a/docs/howto/policy_testing.md b/docs/howto/policy_testing.md
@@ -41,6 +41,17 @@ It is possible, and encouraged, to define multiple policy tests for each package
 or data stream.
 
 
+## Global test configuration
+
+Each package could define a configuration file in `_dev/test/config.yml` to skip all the policy tests.
+
+```yaml
+policy:
+  skip:
+    reason: <reason>
+    link: <link_to_issue>
+```
+
 ### Defining the configuration of the policy
 
 Test configuration for the policy is defined in a YAML file prefixed with

diff --git a/docs/howto/static_testing.md b/docs/howto/static_testing.md
@@ -26,3 +26,14 @@ If you want to run pipeline tests for **specific data streams** in a package, na
 ```
 elastic-package test static --data-streams <data stream 1>[,<data stream 2>,...]
 ```
+
+## Global test configuration
+
+Each package could define a configuration file in `_dev/test/config.yml` to skip all the static tests.
+
+```yaml
+static:
+  skip:
+    reason: <reason>
+    link: <link_to_issue>
+```
diff --git a/docs/howto/system_testing.md b/docs/howto/system_testing.md
@@ -553,6 +553,20 @@ Placeholders used in the `test-<test_name>-config.yml` must be enclosed in `{{{`
 
 **NOTE**: Terraform variables in the form of environment variables (prefixed with `TF_VAR_`) are not injected and cannot be used as placeholder (their value will always be empty).
 
+## Global test configuration
+
+Each package could define a configuration file in `_dev/test/config.yml` that allows to:
+- skip all the system tests defined.
+- set if these system tests should be running in parallel or not.
+
+```yaml
+system:
+  parallel: true
+  skip:
+    reason: <reason>
+    link: <link_to_issue>
+```
+
 ## Running a system test
 
 Once the two levels of configurations are defined as described in the previous section, you are ready to run system tests for a package's data streams.
@@ -761,11 +775,36 @@ Considerations for this mode of running Elastic Agents:
     - Create a new `_dev/deploy/docker` adding the service container if needed.
     - Define the settings required for your Elastic Agents in all the test configuration files.
 
+#### Running system tests in parallel (technical preview)
+
+By default, `elatic-package` runs every system test defined in the package sequentially.
+This could be changed to allow running in parallel tests. For that it is needed:
+- running tests using independent Elastic Agents (see [section](#running-system-tests-with-independent-elastic-agents-in-each-test-technical-preview)).
+- package must define the global test configuration file with these contents to enable system test parallelization:
+  ```yaml
+  system:
+    parallel: true
+  ```
+- define how many tests in parallel should be running
+    - This is done defining the environment variable `ELASTIC_PACKAGE_MAXIMUM_NUMBER_PARALLEL_TESTS`
+
+
+Given those requirements, this is an example to run system tests in parallel:
+```shell
+ELASTIC_PACKAGE_MAXIMUM_NUMBER_PARALLEL_TESTS=5 \
+  ELASTIC_PACKAGE_TEST_ENABLE_INDEPENDENT_AGENT=true \
+  elastic-package test system -v
+```
+
+**NOTE**:
+- Currently, just system tests support to run tests in parallel.
+- **Not recommended** to enable system tests in parallel for packages that make use of the Terraform or Kubernetes service deployers.
+
 ### Detecting ignored fields
 
 As part of the system test, `elastic-package` checks whether any documents couldn't successfully map any fields. Common issues are the configured field limit being exceeded or keyword fields receiving values longer than `ignore_above`. You can learn more in the [Elasticsearch documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-ignored-field.html).
 
-In this case, `elastic-package test system` will fail with an error and print a sample of affected documents. To fix the issue, check which fields got ignored and the `ignored_field_values` and either adapt the mapping or the ingest pipeline to accomodate for the problematic values. In case an ignored field can't be meaningfully mitigated, it's possible to skip the check by listing the field under the `skip_ignored_fields` property in the system test config of the data stream:
+In this case, `elastic-package test system` will fail with an error and print a sample of affected documents. To fix the issue, check which fields got ignored and the `ignored_field_values` and either adapt the mapping or the ingest pipeline to accommodate for the problematic values. In case an ignored field can't be meaningfully mitigated, it's possible to skip the check by listing the field under the `skip_ignored_fields` property in the system test config of the data stream:
 ```
 # data_stream/<data stream name>/_dev/test/system/test-default-config.yml
 skip_ignored_fields:

diff --git a/internal/compose/compose.go b/internal/compose/compose.go
@@ -370,29 +370,33 @@ func (p *Project) WaitForHealthy(ctx context.Context, opts CommandOptions) error
 		}
 
 		for _, containerDescription := range descriptions {
-			logger.Debugf("Container status: %s", containerDescription.String())
 
 			// No healthcheck defined for service
 			if containerDescription.State.Status == "running" && containerDescription.State.Health == nil {
+				logger.Debugf("Container %s status: %s (no health status)", containerDescription.ID, containerDescription.State.Status)
 				continue
 			}
 
 			// Service is up and running and it's healthy
 			if containerDescription.State.Status == "running" && containerDescription.State.Health.Status == "healthy" {
+				logger.Debugf("Container %s status: %s (health: %s)", containerDescription.ID, containerDescription.State.Status, containerDescription.State.Health.Status)
 				continue
 			}
 
 			// Container started and finished with exit code 0
 			if containerDescription.State.Status == "exited" && containerDescription.State.ExitCode == 0 {
+				logger.Debugf("Container %s status: %s (exit code: %d)", containerDescription.ID, containerDescription.State.Status, containerDescription.State.ExitCode)
 				continue
 			}
 
 			// Container exited with code > 0
 			if containerDescription.State.Status == "exited" && containerDescription.State.ExitCode > 0 {
+				logger.Debugf("Container %s status: %s (exit code: %d)", containerDescription.ID, containerDescription.State.Status, containerDescription.State.ExitCode)
 				return fmt.Errorf("container (ID: %s) exited with code %d", containerDescription.ID, containerDescription.State.ExitCode)
 			}
 
 			// Any different status is considered unhealthy
+			logger.Debugf("Container %s status: unhealthy", containerDescription.ID)
 			healthy = false
 		}
 

diff --git a/internal/kibana/agents.go b/internal/kibana/agents.go
@@ -118,7 +118,8 @@ func (c *Client) waitUntilPolicyAssigned(ctx context.Context, a Agent, p Policy)
 		if err != nil {
 			return fmt.Errorf("can't get the agent: %w", err)
 		}
-		logger.Debugf("Agent data: %s", agent.String())
+		logger.Debugf("Agent %s (Host: %s): Policy ID %s LogLevel: %s Status: %s",
+			agent.ID, agent.LocalMetadata.Host.Name, agent.PolicyID, agent.LocalMetadata.Elastic.Agent.LogLevel, agent.Status)
 
 		if agent.PolicyID == p.ID && agent.PolicyRevision >= p.Revision {
 			logger.Debugf("Policy revision assigned to the agent (ID: %s)...", a.ID)

diff --git a/internal/testrunner/globaltestconfig.go b/internal/testrunner/globaltestconfig.go
@@ -0,0 +1,51 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License;
+// you may not use this file except in compliance with the Elastic License.
+
+package testrunner
+
+import (
+	"errors"
+	"fmt"
+	"os"
+	"path/filepath"
+
+	"github.com/elastic/go-ucfg"
+	"github.com/elastic/go-ucfg/yaml"
+)
+
+type globalTestConfig struct {
+	Asset    GlobalRunnerTestConfig `config:"asset"`
+	Pipeline GlobalRunnerTestConfig `config:"pipeline"`
+	Policy   GlobalRunnerTestConfig `config:"policy"`
+	Static   GlobalRunnerTestConfig `config:"static"`
+	System   GlobalRunnerTestConfig `config:"system"`
+}
+
+type GlobalRunnerTestConfig struct {
+	Parallel        bool `config:"parallel"`
+	SkippableConfig `config:",inline"`
+}
+
+func ReadGlobalTestConfig(packageRootPath string) (*globalTestConfig, error) {
+	configFilePath := filepath.Join(packageRootPath, "_dev", "test", "config.yml")
+
+	data, err := os.ReadFile(configFilePath)
+	if errors.Is(err, os.ErrNotExist) {
+		return &globalTestConfig{}, nil
+	}
+	if err != nil {
+		return nil, fmt.Errorf("failed to read %s: %w", configFilePath, err)
+	}
+
+	var c globalTestConfig
+	cfg, err := yaml.NewConfig(data, ucfg.PathSep("."))
+	if err != nil {
+		return nil, fmt.Errorf("unable to load global test configuration file: %s: %w", configFilePath, err)
+	}
+	if err := cfg.Unpack(&c); err != nil {
+		return nil, fmt.Errorf("unable to unpack global test configuration file: %s: %w", configFilePath, err)
+	}
+
+	return &c, nil
+}
diff --git a/internal/testrunner/runners/asset/runner.go b/internal/testrunner/runners/asset/runner.go
@@ -17,19 +17,22 @@ const (
 )
 
 type runner struct {
-	packageRootPath string
-	kibanaClient    *kibana.Client
+	packageRootPath  string
+	kibanaClient     *kibana.Client
+	globalTestConfig testrunner.GlobalRunnerTestConfig
 }
 
 type AssetTestRunnerOptions struct {
-	PackageRootPath string
-	KibanaClient    *kibana.Client
+	PackageRootPath  string
+	KibanaClient     *kibana.Client
+	GlobalTestConfig testrunner.GlobalRunnerTestConfig
 }
 
 func NewAssetTestRunner(options AssetTestRunnerOptions) *runner {
 	runner := runner{
-		packageRootPath: options.PackageRootPath,
-		kibanaClient:    options.KibanaClient,
+		packageRootPath:  options.PackageRootPath,
+		kibanaClient:     options.KibanaClient,
+		globalTestConfig: options.GlobalTestConfig,
 	}
 	return &runner
 }
@@ -53,9 +56,10 @@ func (r *runner) TearDownRunner(ctx context.Context) error {
 func (r *runner) GetTests(ctx context.Context) ([]testrunner.Tester, error) {
 	testers := []testrunner.Tester{
 		NewAssetTester(AssetTesterOptions{
-			PackageRootPath: r.packageRootPath,
-			KibanaClient:    r.kibanaClient,
-			TestFolder:      testrunner.TestFolder{Package: r.packageRootPath},
+			PackageRootPath:  r.packageRootPath,
+			KibanaClient:     r.kibanaClient,
+			TestFolder:       testrunner.TestFolder{Package: r.packageRootPath},
+			GlobalTestConfig: r.globalTestConfig,
 		}),
 	}
 	return testers, nil