Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Managed Iceberg] refactor integration tests; make it easy to add a new catalog to test suite #33444

Merged
merged 8 commits into from
Dec 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/trigger_files/IO_Iceberg_Integration_Tests.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"comment": "Modify this file in a trivial way to cause this test suite to run",
"modification": 7
"modification": 1
}
2 changes: 1 addition & 1 deletion .github/workflows/IO_Iceberg_Integration_Tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -75,4 +75,4 @@ jobs:
- name: Run IcebergIO Integration Test
uses: ./.github/actions/gradle-command-self-hosted-action
with:
gradle-command: :sdks:java:io:iceberg:catalogTests --info
gradle-command: :sdks:java:io:iceberg:integrationTest --info
22 changes: 15 additions & 7 deletions sdks/java/io/iceberg/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ hadoopVersions.each {kv -> configurations.create("hadoopVersion$kv.key")}
def iceberg_version = "1.6.1"
def parquet_version = "1.12.0"
def orc_version = "1.9.2"
def hive_version = "3.1.3"

dependencies {
implementation library.java.vendored_guava_32_1_2_jre
Expand All @@ -66,6 +67,18 @@ dependencies {
testImplementation project(path: ":sdks:java:core", configuration: "shadowTest")
testImplementation project(":sdks:java:extensions:google-cloud-platform-core")
testImplementation library.java.junit

// Hive catalog test dependencies
testImplementation project(path: ":sdks:java:io:iceberg:hive")
testImplementation "org.apache.iceberg:iceberg-common:$iceberg_version"
testImplementation ("org.apache.iceberg:iceberg-hive-metastore:$iceberg_version")
testImplementation ("org.apache.hive:hive-metastore:$hive_version")
testImplementation "org.assertj:assertj-core:3.11.1"
testRuntimeOnly ("org.apache.hive.hcatalog:hive-hcatalog-core:$hive_version") {
exclude group: "org.apache.hive", module: "hive-exec"
exclude group: "org.apache.parquet", module: "parquet-hadoop-bundle"
}

testRuntimeOnly library.java.slf4j_jdk14
testRuntimeOnly project(path: ":runners:direct-java", configuration: "shadow")
testRuntimeOnly project(path: ":runners:google-cloud-dataflow-java")
Expand Down Expand Up @@ -106,10 +119,10 @@ hadoopVersions.each { kv ->
}
}

task integrationTest(type: Test) {
task catalogTests(type: Test) {
group = "Verification"
def gcpProject = project.findProperty('gcpProject') ?: 'apache-beam-testing'
def gcpTempLocation = project.findProperty('gcpTempLocation') ?: 'gs://temp-storage-for-end-to-end-tests'
def gcpTempLocation = project.findProperty('gcpTempLocation') ?: 'gs://managed-iceberg-integration-tests'
systemProperty "beamTestPipelineOptions", JsonOutput.toJson([
"--project=${gcpProject}",
"--tempLocation=${gcpTempLocation}",
Expand All @@ -125,11 +138,6 @@ task integrationTest(type: Test) {
testClassesDirs = sourceSets.test.output.classesDirs
}

tasks.register('catalogTests') {
dependsOn integrationTest
dependsOn ":sdks:java:io:iceberg:hive:integrationTest"
}

task loadTest(type: Test) {
def gcpProject = project.findProperty('gcpProject') ?: 'apache-beam-testing'
def gcpTempLocation = project.findProperty('gcpTempLocation') ?: 'gs://temp-storage-for-end-to-end-tests/temp-lt'
Expand Down
48 changes: 2 additions & 46 deletions sdks/java/io/iceberg/hive/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,7 @@ def avatica_version = "1.25.0"
dependencies {
// dependencies needed to run with iceberg's hive catalog
// these dependencies are going to be included in io-expansion-service
implementation ("org.apache.iceberg:iceberg-hive-metastore:$iceberg_version")
permitUnusedDeclared ("org.apache.iceberg:iceberg-hive-metastore:$iceberg_version")
runtimeOnly ("org.apache.iceberg:iceberg-hive-metastore:$iceberg_version")
// analyzeClassesDependencies fails with "Cannot accept visitor on URL", likely the plugin does not recognize "core" classifier
// use "core" classifier to depend on un-shaded jar
runtimeOnly ("org.apache.hive:hive-exec:$hive_version:core") {
Expand All @@ -51,53 +50,10 @@ dependencies {
runtimeOnly ("org.apache.hadoop:hadoop-yarn-server-resourcemanager:$hadoop_version")
runtimeOnly ("org.apache.hbase:hbase-client:$hbase_version")
runtimeOnly ("org.apache.calcite.avatica:avatica-core:$avatica_version")
implementation ("org.apache.hive:hive-metastore:$hive_version")
runtimeOnly ("org.apache.iceberg:iceberg-parquet:$iceberg_version")
permitUnusedDeclared ("org.apache.hive:hive-metastore:$hive_version")

// ----- below dependencies are for testing and will not appear in the shaded jar -----
// Beam IcebergIO dependencies
testImplementation project(path: ":sdks:java:core", configuration: "shadow")
testImplementation project(":sdks:java:managed")
testImplementation project(":sdks:java:io:iceberg")
testRuntimeOnly project(path: ":runners:direct-java", configuration: "shadow")
testRuntimeOnly library.java.snake_yaml
testRuntimeOnly library.java.bigdataoss_gcs_connector
testRuntimeOnly library.java.hadoop_client

// needed to set up the test environment
testImplementation "org.apache.iceberg:iceberg-common:$iceberg_version"
testImplementation "org.apache.iceberg:iceberg-core:$iceberg_version"
testImplementation "org.assertj:assertj-core:3.11.1"
testImplementation library.java.junit

// needed to set up test Hive Metastore and run tests
testRuntimeOnly ("org.apache.hive.hcatalog:hive-hcatalog-core:$hive_version") {
exclude group: "org.apache.hive", module: "hive-exec"
exclude group: "org.apache.parquet", module: "parquet-hadoop-bundle"
}
testImplementation "org.apache.iceberg:iceberg-parquet:$iceberg_version"
testImplementation "org.apache.parquet:parquet-column:1.12.0"
runtimeOnly ("org.apache.hive:hive-metastore:$hive_version")
}

configurations.all {
// the fatjar "parquet-hadoop-bundle" conflicts with "parquet-hadoop" used by org.apache.iceberg:iceberg-parquet
exclude group: "org.apache.parquet", module: "parquet-hadoop-bundle"
}

task integrationTest(type: Test) {
group = "Verification"
def gcpTempLocation = project.findProperty('gcpTempLocation') ?: 'gs://temp-storage-for-end-to-end-tests/iceberg-hive-it'
systemProperty "beamTestPipelineOptions", JsonOutput.toJson([
"--tempLocation=${gcpTempLocation}",
])

// Disable Gradle cache: these ITs interact with live service that should always be considered "out of date"
outputs.upToDateWhen { false }

include '**/*IT.class'

maxParallelForks 4
classpath = sourceSets.test.runtimeClasspath
testClassesDirs = sourceSets.test.output.classesDirs
}
Loading
Loading