Skip to content

Commit

Permalink
feat(app):
Browse files Browse the repository at this point in the history
- add daily log
- add zhipu model
  • Loading branch information
MorvanZhou committed Sep 3, 2024
1 parent 42b79e2 commit 1bcd386
Show file tree
Hide file tree
Showing 28 changed files with 269 additions and 42 deletions.
4 changes: 4 additions & 0 deletions src/retk/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ class Settings(BaseSettings):
VOLCENGINE_API_KEY: str = Field(env='VOLCENGINE_API_KEY', default="")
VOLCENGINE_ENDPOINT_ID: str = Field(env='VOLCENGINE_ENDPOINT_ID', default="")

# zhipu bigmodel
BIGMODEL_API_KEY: str = Field(env='BIGMODEL_API_KEY', default="")
BIGMODEL_CONCURRENCY: int = Field(env='BIGMODEL_CONCURRENCY', default=1)

# Email client settings
RETHINK_EMAIL: str = Field(env='RETHINK_EMAIL', default="")
RETHINK_EMAIL_PASSWORD: str = Field(env='RETHINK_EMAIL_PASSWORD', default="")
Expand Down
1 change: 1 addition & 0 deletions src/retk/const/user_behavior_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class UserBehaviorTypeEnum(IntEnum):
NODE_FILE_UPLOAD = 23 # backend

LLM_KNOWLEDGE_RESPONSE = 24 # backend
NODE_PAGE_VIEW = 25 # backend


USER_BEHAVIOR_TYPE_MAP = {
Expand Down
5 changes: 5 additions & 0 deletions src/retk/controllers/node/node_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,11 @@ async def get_node(
n, code = await core.node.get(au=au, nid=nid)
maybe_raise_json_exception(au=au, code=code)

await core.statistic.add_user_behavior(
uid=au.u.id,
type_=const.UserBehaviorTypeEnum.NODE_PAGE_VIEW,
remark=nid,
)
return schemas.node.NodeResponse(
requestId=au.request_id,
node=get_node_data(n),
Expand Down
7 changes: 6 additions & 1 deletion src/retk/core/ai/llm/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from .aliyun import AliyunService, AliyunModelEnum
from .baidu import BaiduService, BaiduModelEnum
from .base import BaseLLMService
from .bigmodel import GLMService, GLMModelEnum
from .moonshot import MoonshotService, MoonshotModelEnum
from .openai import OpenaiService, OpenaiModelEnum
from .tencent import TencentService, TencentModelEnum
Expand Down Expand Up @@ -38,10 +39,14 @@
"service": VolcEngineService,
"models": VolcEngineModelEnum,
},
GLMService.name: {
"service": GLMService,
"models": GLMModelEnum,
},
}

TOP_P = 0.9
TEMPERATURE = 0.6
TEMPERATURE = 0.9
TIMEOUT = 60

LLM_DEFAULT_SERVICES: Dict[str, BaseLLMService] = {
Expand Down
2 changes: 1 addition & 1 deletion src/retk/core/ai/llm/api/aliyun.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ class AliyunService(BaseLLMService):
def __init__(
self,
top_p: float = 0.9,
temperature: float = 0.4,
temperature: float = 0.9,
timeout: float = 60.,
):
super().__init__(
Expand Down
2 changes: 1 addition & 1 deletion src/retk/core/ai/llm/api/baidu.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ class BaiduService(BaseLLMService):
def __init__(
self,
top_p: float = 0.9,
temperature: float = 0.4,
temperature: float = 0.9,
timeout: float = 60.,
):
super().__init__(
Expand Down
2 changes: 1 addition & 1 deletion src/retk/core/ai/llm/api/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def __init__(
model_enum: Any,
endpoint: str,
top_p: float = 1.,
temperature: float = 0.4,
temperature: float = 0.9,
timeout: float = None,
default_model: Optional[ModelConfig] = None,
):
Expand Down
97 changes: 97 additions & 0 deletions src/retk/core/ai/llm/api/bigmodel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
import asyncio
from enum import Enum
from typing import List, Tuple, Callable, Union, Dict

from retk import config, const
from retk.core.utils import ratelimiter
from .base import ModelConfig, MessagesType
from .openai import OpenaiLLMStyle


# https://open.bigmodel.cn/dev/howuse/rate-limits/tiers?tab=0
class GLMModelEnum(Enum):
GLM4_PLUS = ModelConfig(
key="GLM-4-Plus",
max_tokens=128_000,
)
GLM4_LONG = ModelConfig(
key="GLM-4-Long",
max_tokens=1_000_000,
)
GLM4_FLASH = ModelConfig(
key="GLM-4-Flash",
max_tokens=128_000,
)


class GLMService(OpenaiLLMStyle):
name = "glm"

def __init__(
self,
top_p: float = 0.9,
temperature: float = 0.9,
timeout: float = 60.,
):
super().__init__(
model_enum=GLMModelEnum,
endpoint="https://open.bigmodel.cn/api/paas/v4/chat/completions",
default_model=GLMModelEnum.GLM4_FLASH.value,
top_p=top_p,
temperature=temperature,
timeout=timeout,
)

@classmethod
def set_api_auth(cls, auth: Dict[str, str]):
config.get_settings().BIGMODEL_API_KEY = auth.get("API-KEY", "")

@staticmethod
def get_api_key():
return config.get_settings().BIGMODEL_API_KEY

@staticmethod
async def _batch_complete_union(
messages: List[MessagesType],
func: Callable,
model: str = None,
req_id: str = None,
) -> List[Tuple[Union[str, Dict[str, str]], const.CodeEnum]]:
settings = config.get_settings()
concurrent_limiter = ratelimiter.ConcurrentLimiter(n=settings.BIGMODEL_CONCURRENCY)

tasks = [
func(
limiters=[concurrent_limiter],
messages=m,
model=model,
req_id=req_id,
) for m in messages
]
return await asyncio.gather(*tasks)

async def batch_complete(
self,
messages: List[MessagesType],
model: str = None,
req_id: str = None,
) -> List[Tuple[str, const.CodeEnum]]:
return await self._batch_complete_union(
messages=messages,
func=self._batch_complete,
model=model,
req_id=req_id,
)

async def batch_complete_json_detect(
self,
messages: List[MessagesType],
model: str = None,
req_id: str = None,
) -> List[Tuple[Dict[str, str], const.CodeEnum]]:
return await self._batch_complete_union(
messages=messages,
func=self._batch_stream_complete_json_detect,
model=model,
req_id=req_id,
)
2 changes: 1 addition & 1 deletion src/retk/core/ai/llm/api/moonshot.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class MoonshotService(OpenaiLLMStyle):
def __init__(
self,
top_p: float = 0.9,
temperature: float = 0.3,
temperature: float = 0.9,
timeout: float = 60.,
):
super().__init__(
Expand Down
4 changes: 2 additions & 2 deletions src/retk/core/ai/llm/api/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def __init__(
endpoint: str,
default_model: ModelConfig,
top_p: float = 0.9,
temperature: float = 0.4,
temperature: float = 0.9,
timeout: float = 60.,
):
super().__init__(
Expand Down Expand Up @@ -148,7 +148,7 @@ class OpenaiService(OpenaiLLMStyle):
def __init__(
self,
top_p: float = 0.9,
temperature: float = 0.7,
temperature: float = 0.9,
timeout: float = 60.,
):
super().__init__(
Expand Down
2 changes: 1 addition & 1 deletion src/retk/core/ai/llm/api/tencent.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ class TencentService(BaseLLMService):
def __init__(
self,
top_p: float = 0.9,
temperature: float = 0.4,
temperature: float = 0.9,
timeout: float = 60.,
):
super().__init__(
Expand Down
2 changes: 1 addition & 1 deletion src/retk/core/ai/llm/api/volcengine.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ class VolcEngineService(OpenaiLLMStyle):
def __init__(
self,
top_p: float = 0.9,
temperature: float = 0.3,
temperature: float = 0.9,
timeout: float = 60.,
):
super().__init__(
Expand Down
2 changes: 1 addition & 1 deletion src/retk/core/ai/llm/api/xfyun.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class XfYunService(BaseLLMService):
def __init__(
self,
top_p: float = 0.9,
temperature: float = 0.4,
temperature: float = 0.9,
timeout: float = 60.,
):
super().__init__(
Expand Down
7 changes: 7 additions & 0 deletions src/retk/core/scheduler/schedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,13 @@ def init_tasks():
kwargs={"delta_days": 30},
hour=1,
)

if not config.is_local_db():
run_every_at(
job_id="auto_daily_report",
func=tasks.auto_daily_report.auto_daily_report,
hour=0,
)
return


Expand Down
1 change: 1 addition & 0 deletions src/retk/core/scheduler/tasks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@
notice,
extend_node,
auto_clean_trash,
auto_daily_report,
)
93 changes: 93 additions & 0 deletions src/retk/core/scheduler/tasks/auto_daily_report.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import asyncio
import json
import os
import time
from datetime import datetime, timedelta

from bson.objectid import ObjectId

from retk import config, const
from retk.core.statistic import __write_line_date, __manage_files
from retk.models.client import init_mongo
from retk.models.coll import CollNameEnum

try:
import aiofiles
except ImportError:
aiofiles = None


def auto_daily_report():
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
res = loop.run_until_complete(_auto_daily_report())
loop.close()
return res


async def _auto_daily_report():
if config.is_local_db() or aiofiles is None:
return
file = const.settings.ANALYTICS_DIR / "daily_report" / "report.log"
lock = asyncio.Lock()
now = datetime.now()
# get last line efficiently
if file.exists():
async with aiofiles.open(file, "r", encoding="utf-8") as f:
async with lock:
try:
await f.seek(-2, os.SEEK_END)
while f.read(1) != "\n":
await f.seek(-2, os.SEEK_CUR)
last_line = await f.readline()
except OSError:
last_line = ""
try:
last_record = json.loads(last_line)
except json.JSONDecodeError:
last_record = {}
else:
last_record = {}
yesterday = (now - timedelta(days=1)).date()
last_record_date = last_record.get("date", None)

if last_record_date is not None:
last_record_date = datetime.strptime(last_record_date, '%Y-%m-%d').date()
else:
last_record_date = yesterday - timedelta(days=1)

if last_record_date >= yesterday:
return

await __manage_files(now, file, lock)

_, db = init_mongo(connection_timeout=5)
total_email_users = await db[CollNameEnum.users.value].count_documents(
{"source": const.UserSourceEnum.EMAIL.value}
)
total_google_users = await db[CollNameEnum.users.value].count_documents(
{"source": const.UserSourceEnum.GOOGLE.value}
)
total_github_users = await db[CollNameEnum.users.value].count_documents(
{"source": const.UserSourceEnum.GITHUB.value}
)
total_users = total_email_users + total_google_users + total_github_users
# date to int timestamp
timestamp = time.mktime(last_record_date.timetuple())
time_filter = ObjectId.from_datetime(datetime.utcfromtimestamp(timestamp))

await __write_line_date(
data={
"date": now.strftime('%Y-%m-%d'),
"totalUsers": total_users,
"totalEmailUsers": total_email_users,
"totalGoogleUsers": total_google_users,
"totalGithubUsers": total_github_users,
"newUsers": await db[CollNameEnum.users.value].count_documents({"_id": {"$gt": time_filter}}),
"totalNodes": await db[CollNameEnum.nodes.value].count_documents({}),
"newNodes": await db[CollNameEnum.nodes.value].count_documents({"_id": {"$gt": time_filter}}),
"totalFiles": await db[CollNameEnum.user_file.value].count_documents({}),
},
path=file,
lock=lock
)
Loading

0 comments on commit 1bcd386

Please sign in to comment.