[SPARK] Add test for Identity Column merge metadata conflict (#3971)

#### Which Delta project/connector is this regarding?  - [x] Spark - [ ] Standalone - [ ] Flink - [ ] Kernel - [ ] Other (fill in here) ## Description It adds a test for identity column to verify merge will be aborted if high water mark is changed after analysis and before execution.  ## How was this patch tested? Test-only.  ## Does this PR introduce _any_ user-facing changes? No.
delta-io · Dec 12, 2024 · fdf887d · fdf887d
1 parent 58f94af
commit fdf887d
Showing 1 changed file with 24 additions and 0 deletions.
diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/IdentityColumnConflictSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/IdentityColumnConflictSuite.scala
@@ -276,6 +276,30 @@ trait IdentityColumnConflictSuiteBase
       tblIsoLevel = Some(Serializable)
     )
   }
+
+  test("high watermark changes after analysis but before execution of merge") {
+    val tblName = getRandomTableName
+    withIdentityColumnTable(GeneratedAsIdentityType.GeneratedAlways, tblName) {
+      // Create a QueryExecution object for a MERGE statement, and it forces the command to be
+      // analyzed, but does not execute the command yet.
+      val parsedMerge = spark.sessionState.sqlParser.parsePlan(
+        s"""MERGE INTO $tblName t
+           |USING (SELECT * FROM range(1000)) s
+           |ON t.id = s.id
+           |WHEN NOT MATCHED THEN INSERT (value) VALUES (s.id)""".stripMargin)
+      val qeMerge = new QueryExecution(spark, parsedMerge)
+      qeMerge.analyzed
+
+      // Insert a row, forcing the high watermark to be updated.
+      sql(s"INSERT INTO $tblName (value) VALUES (0)")
+
+      // Force merge to be executed. This should fail, as MERGE is still using the old high
+      // watermark in its insert action.
+      intercept[MetadataChangedException] {
+        qeMerge.commandExecuted
+      }
+    }
+  }
 }
 
 class IdentityColumnConflictScalaSuite