From cd8c7759c0117b547863dc070cb5b2b1578946ab Mon Sep 17 00:00:00 2001 From: Abhishek Bhagwat Date: Wed, 26 Jun 2024 14:00:08 +0000 Subject: [PATCH] feat: Add Vertex Vector Search Datapoints deletion --- .../vectorstores/_searcher.py | 27 +++++++++++++++- .../vectorstores/vectorstores.py | 31 +++++++++++++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/libs/vertexai/langchain_google_vertexai/vectorstores/_searcher.py b/libs/vertexai/langchain_google_vertexai/vectorstores/_searcher.py index 618ef142..22fc9689 100644 --- a/libs/vertexai/langchain_google_vertexai/vectorstores/_searcher.py +++ b/libs/vertexai/langchain_google_vertexai/vectorstores/_searcher.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import Any, List, Tuple, Union +from typing import Any, List, Sequence, Tuple, Union from google.cloud import storage # type: ignore[attr-defined, unused-ignore] from google.cloud.aiplatform import telemetry @@ -60,6 +60,11 @@ def add_to_index( """ raise NotImplementedError() + @abstractmethod + def remove_datapoints(self, datapoint_ids: Sequence[str]) -> None: + """Remove datapoints from the index.""" + raise NotImplementedError() + def _postprocess_response( self, response: List[List[MatchNeighbor]] ) -> List[List[Tuple[str, float]]]: @@ -136,6 +141,26 @@ def add_to_index( is_complete_overwrite=is_complete_overwrite, ) + def remove_datapoints(self, datapoint_ids: Sequence[str]) -> None: + """Remove datapoints from the index. + + Args: + datapoint_ids: Sequence[str] + Required. The list of datapoint ids t + Raises: + ValueError: If the datapoint_ids sequence is empty. + RuntimeError: If there's an error while removing datapoints. + """ + if not datapoint_ids: + raise ValueError("datapoint_ids must not be empty") + + try: + self._index.remove_datapoints(datapoint_ids=datapoint_ids) + except Exception as e: + raise RuntimeError( + f"Error removing datapoints from the index: {str(e)}" + ) from e + def find_neighbors( self, embeddings: List[List[float]], diff --git a/libs/vertexai/langchain_google_vertexai/vectorstores/vectorstores.py b/libs/vertexai/langchain_google_vertexai/vectorstores/vectorstores.py index 33b6ed63..aac88432 100644 --- a/libs/vertexai/langchain_google_vertexai/vectorstores/vectorstores.py +++ b/libs/vertexai/langchain_google_vertexai/vectorstores/vectorstores.py @@ -192,6 +192,10 @@ def add_texts( f"{len(metadatas)} != {len(texts)}" ) + # Add generated IDs to metadata + for metadata, id in zip(metadatas, ids): + metadata["doc_id"] = id + documents = [ Document(page_content=text, metadata=metadata) for text, metadata in zip(texts, metadatas) @@ -207,6 +211,33 @@ def add_texts( return ids + def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> Optional[bool]: + """ + Delete by vector ID. + Args: + ids (Optional[List[str]]): List of ids to delete. + **kwargs (Any): Other keyword arguments (not used, + but included for compatibility). + Returns: + Optional[bool]: True if deletion is successful. + Raises: + ValueError: If ids is None or an empty list. + RuntimeError: If an error occurs during the deletion process. + """ + if ids is None or len(ids) == 0: + raise ValueError("ids must be provided and cannot be an empty list") + + try: + # Remove datapoints from the Vertex Vector Search Index + self._searcher.remove_datapoints(datapoint_ids=ids) + # Remove documents from the document storage + self._document_storage.mdelete(ids) + + return True + + except Exception as e: + raise RuntimeError(f"Error during deletion: {str(e)}") from e + @classmethod def from_texts( cls: Type["_BaseVertexAIVectorStore"],