Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: implement knowledge update functionality for bots #547

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 35 additions & 9 deletions client/app/factory/list/components/BotCard.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ import { RagTask } from '@/app/services/BotsController';
import CardGithubIcon from '@/public/icons/CardGithubIcon';
import CardHomeIcon from '@/public/icons/CardHomeIcon';
import CardCartIcon from '@/public/icons/CardCartIcon';
import { useKnowledgeUpdate } from '@/app/hooks/useKnowledgeUpdate';


declare type Bot = Tables<'bots'>;

Expand Down Expand Up @@ -79,6 +81,8 @@ const BotCard = (props: { bot: Bot }) => {
const { bot } = props;
const router = useRouter();
const { deleteBot, isLoading, isSuccess } = useBotDelete();
const { mutate: updateKnowledge, isPending: isUpdating } =
useKnowledgeUpdate();
const { data: taskInfo } = useGetBotRagTask(bot.repo_name!, false);

useEffect(() => {
Expand Down Expand Up @@ -115,6 +119,19 @@ const BotCard = (props: { bot: Bot }) => {
);
};

const handleUpdateKnowledge = () => {
updateKnowledge(
{
bot_id: bot.id,
},
{
onSuccess: () => {
// TODO
},
},
);
};

return (
<>
<Card
Expand Down Expand Up @@ -188,21 +205,30 @@ const BotCard = (props: { bot: Bot }) => {
placement="top"
content={I18N.components.BotCard.gengXinZhiShiKu}
classNames={{
base: [
// arrow color
'before:bg-[#3F3F46] dark:before:bg-white',
],
base: ['before:bg-[#3F3F46] dark:before:bg-white'],
content: [
'py-2 px-4 rounded-lg shadow-xl text-white',
'bg-[#3F3F46]',
],
}}
>
<Image
src="../images/refresh.svg"
alt={I18N.components.BotCard.gengXinZhiShi}
className="z-10 cursor-pointer"
/>
{isUpdating ? (
<div className="w-6 h-6 flex items-center justify-center">
<span className="animate-spinner-ease-spin">
<LoadingIcon />
</span>
</div>
) : (
<Image
src="../images/refresh.svg"
onClick={(e) => {
e.stopPropagation();
handleUpdateKnowledge();
}}
alt={I18N.components.BotCard.gengXinZhiShi}
className="z-10 cursor-pointer"
/>
)}
</Tooltip>
</div>
</div>
Expand Down
9 changes: 9 additions & 0 deletions client/app/hooks/useKnowledgeUpdate.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import { useMutation } from '@tanstack/react-query';
import { updateKnowledge } from '../services/BotsController';

export function useKnowledgeUpdate() {
return useMutation({
mutationKey: ['updateKnowledge'],
mutationFn: updateKnowledge
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

在完成更新后,可以将 useGetBotRagTask client 重刷一遍,更新下 taskInfo,这样可以让 BotCard 组件下的 任务状态 ICON 转起来

});
}
7 changes: 7 additions & 0 deletions client/app/services/BotsController.ts
Original file line number Diff line number Diff line change
Expand Up @@ -146,3 +146,10 @@ export async function bindBotToRepo(repsConfigs: BindBotToRepoConfig[]) {
});
return response.data;
}

// Add knowledge update API
export async function updateKnowledge(config: {
bot_id: string;
}) {
return axios.post(`${apiDomain}/api/rag/update_knowledge`, config);
}
2 changes: 1 addition & 1 deletion petercat_utils/rag_helper/git_doc_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@
)

def handle_blob_node(self):
retrieval.add_knowledge_by_doc(
retrieval.check_and_update_knowledge(

Check warning on line 145 in petercat_utils/rag_helper/git_doc_task.py

View check run for this annotation

Codecov / codecov/patch

petercat_utils/rag_helper/git_doc_task.py#L145

Added line #L145 was not covered by tests
RAGGitDocConfig(
repo_name=self.repo_name,
file_path=self.path,
Expand Down
37 changes: 37 additions & 0 deletions petercat_utils/rag_helper/retrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,3 +208,40 @@ def get_chunk_list(repo_name: str, page_size: int, page_number: int):
)
total_count = len(count_response.data)
return {"rows": query.data, "total": total_count}


def check_and_update_knowledge(config: RAGGitDocConfig):
# 初始化 GitHub loader 获取最新的文件信息
loader = init_github_file_loader(config)
latest_sha = loader.file_sha

# 获取当前存储的文档
client = get_client()
existing_docs = (
client.table(TABLE_NAME)
.select("id, file_sha")
.eq("repo_name", config.repo_name)
.eq("file_path", config.file_path)
.execute()
)
Comment on lines +220 to +226
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

我本地跑这里总超时, 不知道为啥, 还没能解决

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

'canceling statement due to statement timeout'

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

see this issue #531


if not existing_docs.data:
# 如果不存在文档,直接添加
return add_knowledge_by_doc(config)

# 检查 SHA 是否变化
current_sha = existing_docs.data[0]["file_sha"]

if current_sha == latest_sha:
return False

# SHA 不同,需要更新
# 1. 删除旧文档
client.table(TABLE_NAME)\
.delete()\
.eq("repo_name", config.repo_name)\
.eq("file_path", config.file_path)\
.execute()

# 2. 添加新文档
return add_knowledge_by_doc(config)
43 changes: 42 additions & 1 deletion server/rag/router.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import json
from typing import Optional
from typing import Annotated, Optional
from pydantic import BaseModel

from auth.get_user_info import get_user_id
from fastapi import APIRouter, Depends
from petercat_utils.db.client.supabase import get_client

Expand Down Expand Up @@ -116,3 +118,42 @@ def get_rag_task(repo_name: str):
return response
except Exception as e:
return json.dumps({"success": False, "message": str(e)})

class UpdateKnowledgeRequest(BaseModel):
bot_id: str

@router.post("/rag/update_knowledge", dependencies=[Depends(verify_rate_limit)])

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider adding authentication checks to ensure that only authorized users can update the knowledge of a bot. This will prevent unauthorized access and potential misuse of the update functionality.

def update_knowledge(request: UpdateKnowledgeRequest, user_id: Annotated[str | None, Depends(get_user_id)] = None):
try:
# Get config from database using bot_id
supabase = get_client()
response = (
supabase.table("bots")
.select("*")
.eq("id", request.bot_id)
.eq("uid", user_id)
.single()
.execute()
)

if not response.data:
return json.dumps({
"success": False,
"message": f"Bot with id {request.bot_id} not found"
})

bot_config = RAGGitDocConfig(**response.data)
result = retrieval.check_and_update_knowledge(bot_config)

if result:
return json.dumps({
"success": True,
"message": "Knowledge updated successfully!"
})
else:
return json.dumps({
"success": False,
"message": "Knowledge not updated!"
})
except Exception as e:
return json.dumps({"success": False, "message": str(e)})
Loading