-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat:Add Knowledge Process Workflow (#2210)
- Loading branch information
Showing
23 changed files
with
7,218 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
"""Chunk Manager Operator.""" | ||
from typing import List, Optional | ||
|
||
from dbgpt.core import Chunk | ||
from dbgpt.core.awel import MapOperator | ||
from dbgpt.core.awel.flow import IOField, OperatorCategory, Parameter, ViewMetadata | ||
from dbgpt.rag import ChunkParameters | ||
from dbgpt.rag.chunk_manager import ChunkManager | ||
from dbgpt.rag.knowledge.base import Knowledge | ||
from dbgpt.util.i18n_utils import _ | ||
|
||
|
||
class ChunkManagerOperator(MapOperator[Knowledge, List[Chunk]]): | ||
"""Chunk Manager Operator.""" | ||
|
||
metadata = ViewMetadata( | ||
label=_("Chunk Manager Operator"), | ||
name="chunk_manager_operator", | ||
description=_(" Split Knowledge Documents into chunks."), | ||
category=OperatorCategory.RAG, | ||
parameters=[ | ||
Parameter.build_from( | ||
_("Chunk Split Parameters"), | ||
"chunk_parameters", | ||
ChunkParameters, | ||
description=_("Chunk Split Parameters."), | ||
optional=True, | ||
default=None, | ||
alias=["chunk_parameters"], | ||
), | ||
], | ||
inputs=[ | ||
IOField.build_from( | ||
_("Knowledge"), | ||
"knowledge", | ||
Knowledge, | ||
description=_("The knowledge to be loaded."), | ||
) | ||
], | ||
outputs=[ | ||
IOField.build_from( | ||
_("Chunks"), | ||
"chunks", | ||
List[Chunk], | ||
description=_("The split chunks by chunk manager."), | ||
is_list=True, | ||
) | ||
], | ||
) | ||
|
||
def __init__( | ||
self, | ||
chunk_parameters: Optional[ChunkParameters] = None, | ||
**kwargs, | ||
): | ||
"""Init the datasource operator.""" | ||
MapOperator.__init__(self, **kwargs) | ||
self._chunk_parameters = chunk_parameters or ChunkParameters( | ||
chunk_strategy="Automatic" | ||
) | ||
|
||
async def map(self, knowledge: Knowledge) -> List[Chunk]: | ||
"""Persist chunks in vector db.""" | ||
documents = knowledge.load() | ||
chunk_manager = ChunkManager( | ||
knowledge=knowledge, chunk_parameter=self._chunk_parameters | ||
) | ||
return chunk_manager.split(documents) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
"""Full Text Operator.""" | ||
import os | ||
from typing import List, Optional | ||
|
||
from dbgpt.core import Chunk | ||
from dbgpt.core.awel import MapOperator | ||
from dbgpt.core.awel.flow import IOField, OperatorCategory, Parameter, ViewMetadata | ||
from dbgpt.storage.full_text.base import FullTextStoreBase | ||
from dbgpt.util.i18n_utils import _ | ||
|
||
|
||
class FullTextStorageOperator(MapOperator[List[Chunk], List[Chunk]]): | ||
"""Full Text Operator.""" | ||
|
||
metadata = ViewMetadata( | ||
label=_("Full Text Storage Operator"), | ||
name="full text_storage_operator", | ||
description=_("Persist embeddings into full text storage."), | ||
category=OperatorCategory.RAG, | ||
parameters=[ | ||
Parameter.build_from( | ||
_("Full Text Connector"), | ||
"full_text_store", | ||
FullTextStoreBase, | ||
description=_("The full text store."), | ||
alias=["full_text_store"], | ||
), | ||
], | ||
inputs=[ | ||
IOField.build_from( | ||
_("Chunks"), | ||
"chunks", | ||
List[Chunk], | ||
description=_("The text split chunks by chunk manager."), | ||
is_list=True, | ||
) | ||
], | ||
outputs=[ | ||
IOField.build_from( | ||
_("Chunks"), | ||
"chunks", | ||
List[Chunk], | ||
description=_( | ||
"The assembled chunks, it has been persisted to full text " "store." | ||
), | ||
is_list=True, | ||
) | ||
], | ||
) | ||
|
||
def __init__( | ||
self, | ||
full_text_store: Optional[FullTextStoreBase] = None, | ||
max_chunks_once_load: Optional[int] = None, | ||
**kwargs, | ||
): | ||
"""Init the datasource operator.""" | ||
MapOperator.__init__(self, **kwargs) | ||
self._full_text_store = full_text_store | ||
self._embeddings = full_text_store.get_config().embedding_fn | ||
self._max_chunks_once_load = max_chunks_once_load | ||
self.full_text_store = full_text_store | ||
|
||
async def map(self, chunks: List[Chunk]) -> List[Chunk]: | ||
"""Persist chunks in full text db.""" | ||
max_chunks_once_load = self._max_chunks_once_load or int( | ||
os.getenv("KNOWLEDGE_MAX_CHUNKS_ONCE_LOAD", 10) | ||
) | ||
full_text_ids = await self._full_text_store.aload_document_with_limit( | ||
chunks, max_chunks_once_load | ||
) | ||
for chunk, full_text_id in zip(chunks, full_text_ids): | ||
chunk.chunk_id = str(full_text_id) | ||
return chunks |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
"""Knowledge Graph Operator.""" | ||
import os | ||
from typing import List, Optional | ||
|
||
from dbgpt.core import Chunk | ||
from dbgpt.core.awel import MapOperator | ||
from dbgpt.core.awel.flow import IOField, OperatorCategory, Parameter, ViewMetadata | ||
from dbgpt.storage.knowledge_graph.base import KnowledgeGraphBase | ||
from dbgpt.util.i18n_utils import _ | ||
|
||
|
||
class KnowledgeGraphOperator(MapOperator[List[Chunk], List[Chunk]]): | ||
"""Knowledge Graph Operator.""" | ||
|
||
metadata = ViewMetadata( | ||
label=_("Knowledge Graph Operator"), | ||
name="knowledge_graph_operator", | ||
description=_("Extract Documents and persist into graph database."), | ||
category=OperatorCategory.RAG, | ||
parameters=[ | ||
Parameter.build_from( | ||
_("Knowledge Graph Connector"), | ||
"graph_store", | ||
KnowledgeGraphBase, | ||
description=_("The knowledge graph."), | ||
alias=["graph_store"], | ||
), | ||
], | ||
inputs=[ | ||
IOField.build_from( | ||
_("Chunks"), | ||
"chunks", | ||
List[Chunk], | ||
description=_("The text split chunks by chunk manager."), | ||
is_list=True, | ||
) | ||
], | ||
outputs=[ | ||
IOField.build_from( | ||
_("Chunks"), | ||
"chunks", | ||
List[Chunk], | ||
description=_( | ||
"The assembled chunks, it has been persisted to graph store." | ||
), | ||
is_list=True, | ||
) | ||
], | ||
) | ||
|
||
def __init__( | ||
self, | ||
graph_store: Optional[KnowledgeGraphBase] = None, | ||
max_chunks_once_load: Optional[int] = None, | ||
**kwargs, | ||
): | ||
"""Init the Knowledge Graph operator.""" | ||
MapOperator.__init__(self, **kwargs) | ||
self._graph_store = graph_store | ||
self._embeddings = graph_store.get_config().embedding_fn | ||
self._max_chunks_once_load = max_chunks_once_load | ||
self.graph_store = graph_store | ||
|
||
async def map(self, chunks: List[Chunk]) -> List[Chunk]: | ||
"""Persist chunks in graph db.""" | ||
max_chunks_once_load = self._max_chunks_once_load or int( | ||
os.getenv("KNOWLEDGE_MAX_CHUNKS_ONCE_LOAD", 10) | ||
) | ||
graph_ids = await self._graph_store.aload_document_with_limit( | ||
chunks, max_chunks_once_load | ||
) | ||
for chunk, graph_id in zip(chunks, graph_ids): | ||
chunk.chunk_id = str(graph_id) | ||
return chunks |
Oops, something went wrong.