From 024db9b5214139fe65df6111cf5b82de6010d6d0 Mon Sep 17 00:00:00 2001 From: Keming Date: Wed, 31 Jan 2024 12:16:24 +0800 Subject: [PATCH] feat: support qdrant Signed-off-by: Keming --- README.md | 4 +- .../compose.pgvecto_rs.yaml | 4 +- .../compose.pgvector.yaml | 2 +- docker/compose.qdrant.yaml | 17 +++++ pyproject.toml | 1 + vector_bench/client/__init__.py | 2 + vector_bench/client/base.py | 2 +- vector_bench/client/qdrant.py | 64 +++++++++++++++++++ 8 files changed, 90 insertions(+), 6 deletions(-) rename server/pgvecto.rs/compose.yaml => docker/compose.pgvecto_rs.yaml (88%) rename server/pgvector/compose.yaml => docker/compose.pgvector.yaml (89%) create mode 100644 docker/compose.qdrant.yaml create mode 100644 vector_bench/client/qdrant.py diff --git a/README.md b/README.md index 5b61e56..b37dd3c 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ Supported databases/extensions: - [x] [`pgvecto.rs`](https://github.com/tensorchord/pgvecto.rs) - [x] [`pgvector`](https://github.com/pgvector/pgvector) -- [ ] [`qdrant`](https://github.com/qdrant/qdrant/) +- [x] [`qdrant`](https://github.com/qdrant/qdrant/) Supported datasets: @@ -24,7 +24,7 @@ pip install vector_bench Run the docker compose file under [`server`](server/) folder. ```base -cd server/pgvecto.rs && docker compose up -d +docker compose -f docker/compose.${DB_NAME}.yaml up -d ``` ### Client diff --git a/server/pgvecto.rs/compose.yaml b/docker/compose.pgvecto_rs.yaml similarity index 88% rename from server/pgvecto.rs/compose.yaml rename to docker/compose.pgvecto_rs.yaml index f1d7ee1..fef771c 100644 --- a/server/pgvecto.rs/compose.yaml +++ b/docker/compose.pgvecto_rs.yaml @@ -1,7 +1,7 @@ services: - pgvector: + pgvectors: image: tensorchord/pgvecto-rs:pg15-v0.1.13 - container_name: pgvector + container_name: pgvectors environment: - POSTGRES_USER=postgres - POSTGRES_PASSWORD=password diff --git a/server/pgvector/compose.yaml b/docker/compose.pgvector.yaml similarity index 89% rename from server/pgvector/compose.yaml rename to docker/compose.pgvector.yaml index fd30a66..85d13d3 100644 --- a/server/pgvector/compose.yaml +++ b/docker/compose.pgvector.yaml @@ -1,6 +1,6 @@ services: pgvector: - image: ankane/pgvector:v0.5.1 + image: pgvector/pgvector:0.6.0-pg15 container_name: pgvector environment: - POSTGRES_USER=postgres diff --git a/docker/compose.qdrant.yaml b/docker/compose.qdrant.yaml new file mode 100644 index 0000000..d1260ec --- /dev/null +++ b/docker/compose.qdrant.yaml @@ -0,0 +1,17 @@ +services: + qdrant: + image: qdrant/qdrant:v1.7.4 + container_name: qdrant + ports: + - "6333:6333" + logging: + driver: "json-file" + options: + max-file: "1" + max-size: "10m" + deploy: + resources: + limits: + cpus: "8" + reservations: + cpus: "4" diff --git a/pyproject.toml b/pyproject.toml index 17bf012..8ed845c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,7 @@ dependencies = [ "tqdm~=4.66", "httpx~=0.25", "psycopg[binary]~=3.1", + "qdrant-client~=1.7.1", ] [project.optional-dependencies] dev = [ diff --git a/vector_bench/client/__init__.py b/vector_bench/client/__init__.py index 8101650..31acedc 100644 --- a/vector_bench/client/__init__.py +++ b/vector_bench/client/__init__.py @@ -1,8 +1,10 @@ from vector_bench.client.pgvecto_rs import PgVectorsClient from vector_bench.client.pgvector import PgvectorClient +from vector_bench.client.qdrant import QdrantVectorClient from vector_bench.spec import EnumSelector class DataBaseClient(EnumSelector): PGVECTO_RS = PgVectorsClient PGVECTOR = PgvectorClient + QDRANT = QdrantVectorClient diff --git a/vector_bench/client/base.py b/vector_bench/client/base.py index d163f35..a4f668a 100644 --- a/vector_bench/client/base.py +++ b/vector_bench/client/base.py @@ -11,7 +11,7 @@ def insert_batch(self, records: list[Record]): pass @abc.abstractmethod - def query(self, vector: list[float], top_k: int = 10): + def query(self, vector: list[float], top_k: int = 10) -> list[Record]: pass @abc.abstractclassmethod diff --git a/vector_bench/client/qdrant.py b/vector_bench/client/qdrant.py new file mode 100644 index 0000000..893162a --- /dev/null +++ b/vector_bench/client/qdrant.py @@ -0,0 +1,64 @@ +from __future__ import annotations + +from qdrant_client import QdrantClient +from qdrant_client.models import Distance as QdrantDistance +from qdrant_client.models import PointStruct, ScoredPoint, VectorParams + +from vector_bench.client.base import BaseClient +from vector_bench.spec import DatabaseConfig, Distance, Record + +DISTANCE_TO_QDRANT = { + Distance.COSINE: QdrantDistance.COSINE, + Distance.EUCLIDEAN: QdrantDistance.EUCLID, + Distance.DOT_PRODUCT: QdrantDistance.DOT, +} + + +class QdrantVectorClient(BaseClient): + dim: int + url: str + table: str + distance: Distance + + @classmethod + def from_config(cls, config: DatabaseConfig) -> QdrantVectorClient: + cls.dim = config.vector_dim + cls.url = config.url + cls.table = f"{config.table}_qdrant" + cls.distance = config.distance + + cls = QdrantVectorClient() + cls.init_db() + return cls + + def init_db(self): + self.client = QdrantClient(url=self.url) + self.client.create_collection( + collection_name=self.table, + vectors_config=VectorParams( + size=self.dim, + distance=DISTANCE_TO_QDRANT[self.distance.__func__], + ), + ) + + def insert_batch(self, records: list[Record]): + self.client.upsert( + collection_name=self.table, + points=[ + PointStruct( + id=record.id, vector=record.vector.tolist(), payload=record.metadata + ) + for record in records + ], + ) + + def query(self, vector: list[float], top_k: int = 10) -> list[Record]: + points: list[ScoredPoint] = self.client.search( + collection_name=self.table, + query_vector=vector, + limit=top_k, + ) + return [ + Record(id=point.id, vector=point.vector, metadata=point.payload) + for point in points + ]