From 6939cc16ea254d2ce3f4a6df4b5d41fa0d06714d Mon Sep 17 00:00:00 2001 From: ivanzhu <53419016+ivanzhu109@users.noreply.github.com> Date: Thu, 9 May 2024 15:24:33 +0800 Subject: [PATCH 1/2] fix(rag): Fix CrossEncoderRanker bug of EmbeddingRetriever --- dbgpt/rag/retriever/rerank.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbgpt/rag/retriever/rerank.py b/dbgpt/rag/retriever/rerank.py index 69240c4b9..fde7cf550 100644 --- a/dbgpt/rag/retriever/rerank.py +++ b/dbgpt/rag/retriever/rerank.py @@ -219,7 +219,7 @@ def rank( rank_scores = self._model.predict(sentences=query_content_pairs) for candidate, score in zip(candidates_with_scores, rank_scores): - candidate.score = score + candidate.score = float(score) new_candidates_with_scores = sorted( candidates_with_scores, key=lambda x: x.score, reverse=True From 1e57e8a8f3c187a4df257e32ba18939525cfc94b Mon Sep 17 00:00:00 2001 From: aries_ckt <916701291@qq.com> Date: Fri, 10 May 2024 00:20:09 +0800 Subject: [PATCH 2/2] style:mypy fmt --- dbgpt/storage/vector_store/chroma_store.py | 2 +- dbgpt/storage/vector_store/pgvector_store.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/dbgpt/storage/vector_store/chroma_store.py b/dbgpt/storage/vector_store/chroma_store.py index 16e282dd8..094f3e6d9 100644 --- a/dbgpt/storage/vector_store/chroma_store.py +++ b/dbgpt/storage/vector_store/chroma_store.py @@ -82,7 +82,7 @@ def __init__(self, vector_store_config: ChromaVectorConfig) -> None: # client_settings=chroma_settings, client=client, collection_metadata=collection_metadata, - ) + ) # type: ignore def similar_search( self, text, topk, filters: Optional[MetadataFilters] = None diff --git a/dbgpt/storage/vector_store/pgvector_store.py b/dbgpt/storage/vector_store/pgvector_store.py index 02ab4e1ec..cb9728e49 100644 --- a/dbgpt/storage/vector_store/pgvector_store.py +++ b/dbgpt/storage/vector_store/pgvector_store.py @@ -93,7 +93,8 @@ def load_document(self, chunks: List[Chunk]) -> List[str]: List[str]: chunk ids. """ lc_documents = [Chunk.chunk2langchain(chunk) for chunk in chunks] - return self.vector_store_client.from_documents(lc_documents) + self.vector_store_client.from_documents(lc_documents) + return [str(chunk.chunk_id) for chunk in lc_documents] def delete_vector_name(self, vector_name: str): """Delete vector by name. @@ -109,4 +110,5 @@ def delete_by_ids(self, ids: str): Args: ids(str): vector ids, separated by comma. """ - return self.vector_store_client.delete(ids) + delete_ids = ids.split(",") + return self.vector_store_client.delete(delete_ids)