feat: named vector support

qdrant · Jan 23, 2024 · f8f400d · f8f400d
1 parent 75c38fa
commit f8f400d
Show file tree

Hide file tree

Showing 3 changed files with 12 additions and 2 deletions.
diff --git a/README.md b/README.md
@@ -70,14 +70,15 @@ To load data into Qdrant, a collection has to be created beforehand with the app
 - An API key can be set using the `api_key` option to make authenticated requests.
 
 ## Databricks
+
 You can use the `qdrant-spark` connector as a library in Databricks to ingest data into Qdrant.
+
 - Go to the `Libraries` section in your cluster dashboard.
 - Select `Install New` to open the library installation modal.
 - Search for `io.qdrant:spark:2.0` in the Maven packages and click `Install`.
 
 <img width="1064" alt="Screenshot 2024-01-05 at 17 20 01 (1)" src="https://github.com/qdrant/qdrant-spark/assets/46051506/d95773e0-c5c6-4ff2-bf50-8055bb08fd1b">
 
-
 ## Datatype support 📋
 
 Qdrant supports all the Spark data types, and the appropriate types are mapped based on the provided `schema`.
@@ -95,6 +96,7 @@ Qdrant supports all the Spark data types, and the appropriate types are mapped b
 | `batch_size`      | Max size of the upload batch. Default: 100                                | `IntType`              | ❌       |
 | `retries`         | Number of upload retries. Default: 3                                      | `IntType`              | ❌       |
 | `api_key`         | Qdrant API key to be sent in the header. Default: null                    | `StringType`           | ❌       |
+| `vector_name`         | Name of the vector in the collection. Default: null                    | `StringType`           | ❌       |
 
 ## LICENSE 📜
 

diff --git a/src/main/java/io/qdrant/spark/QdrantDataWriter.java b/src/main/java/io/qdrant/spark/QdrantDataWriter.java
@@ -4,6 +4,8 @@
 import static io.qdrant.client.ValueFactory.list;
 import static io.qdrant.client.ValueFactory.nullValue;
 import static io.qdrant.client.ValueFactory.value;
+import static io.qdrant.client.VectorFactory.vector;
+import static io.qdrant.client.VectorsFactory.namedVectors;
 import static io.qdrant.client.VectorsFactory.vectors;
 
 import io.qdrant.client.grpc.JsonWithInt.Struct;
@@ -76,7 +78,11 @@ public void write(InternalRow record) {
 
       } else if (field.name().equals(this.options.embeddingField)) {
         float[] embeddings = record.getArray(fieldIndex).toFloatArray();
-        pointBuilder.setVectors(vectors(embeddings));
+        if (options.vectorName != null) {
+          pointBuilder.setVectors(namedVectors(Map.of(options.vectorName, vector(embeddings))));
+        } else {
+          pointBuilder.setVectors(vectors(embeddings));
+        }
       } else {
         payload.put(field.name(), convertToJavaType(record, field, fieldIndex));
       }

diff --git a/src/main/java/io/qdrant/spark/QdrantOptions.java b/src/main/java/io/qdrant/spark/QdrantOptions.java
@@ -10,6 +10,7 @@ public class QdrantOptions implements Serializable {
   public String collectionName;
   public String embeddingField;
   public String idField;
+  public String vectorName;
   public int batchSize = 100;
   public int retries = 3;
 
@@ -24,6 +25,7 @@ public QdrantOptions(Map<String, String> options) {
     this.embeddingField = options.get("embedding_field");
     this.idField = options.get("id_field");
     this.apiKey = options.get("api_key");
+    this.vectorName = options.get("vector_name");
 
     if (options.containsKey("batch_size")) {
       this.batchSize = Integer.parseInt(options.get("batch_size"));