Skip to content

Commit

Permalink
add idf
Browse files Browse the repository at this point in the history
  • Loading branch information
MorvanZhou committed Nov 14, 2023
1 parent 00bb578 commit 871f037
Show file tree
Hide file tree
Showing 17 changed files with 905,242 additions and 185 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
/.tox
/**/.data
/tests/tmp
/src/rethink/.data
/**/*.egg-info
/dist
**/__pycache__
/src/rethink/models/search/data/*.csv
/src/rethink/models/search/data/words.txt


# local env files
Expand Down
5 changes: 5 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,8 @@ httpx>=0.25.0
captcha>=0.5.0
python-multipart>=0.0.6
cos-python-sdk-v5>=1.9.26
numpy>=1.25.2
scikit-learn>=1.3.2
jieba>=0.42.1
requests>=2.31.0
starlette>=0.27.0
5 changes: 5 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@ install_requires =
captcha>=0.5.0
python-multipart>=0.0.6
cos-python-sdk-v5>=1.9.26
numpy>=1.25.2
scikit-learn>=1.3.2
jieba>=0.42.1
requests>=2.31.0
starlette>=0.27.0

[options.packages.find]
where = src
Expand Down
72 changes: 53 additions & 19 deletions src/rethink/models/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,18 @@
from bson.tz_util import utc
from mongita import MongitaClientDisk
from mongita.collection import Collection
from pymongo import MongoClient
from pymongo import MongoClient, TEXT
from pymongo.collection import Collection as RemoteCollection

from rethink import config, const
from rethink.logger import logger
from . import utils
from .tps import UserMeta, UserNodeIds, Node
from .tps import UserMeta, Node


@dataclass
class Collections:
users: Union[Collection, RemoteCollection] = None
unids: Union[Collection, RemoteCollection] = None
nodes: Union[Collection, RemoteCollection] = None
import_data: Union[Collection, RemoteCollection] = None
user_file: Union[Collection, RemoteCollection] = None
Expand Down Expand Up @@ -47,11 +46,57 @@ def set_client():
)


def try_build_index():
# try creating index
if isinstance(CLIENT, MongoClient):
users_info = COLL.users.index_information()
if "id_1" not in users_info:
COLL.users.create_index("id", unique=True)
if "account_1_source_1" not in users_info:
COLL.users.create_index(["account", "source"], unique=True)

nodes_info = COLL.nodes.index_information()
if "id_1" not in nodes_info:
COLL.nodes.create_index("id", unique=True)
if "uid_1_id_-1_modifiedAt_-1" not in nodes_info:
COLL.nodes.create_index(
[("uid", 1), ("id", -1), ("modifiedAt", -1)],
unique=True,
)
if "uid_1_id_-1_inTrash_-1" not in nodes_info:
COLL.nodes.create_index(
[("uid", 1), ("id", -1), ("inTrash", -1)],
unique=True,
)
if "uid_1_id_-1" not in nodes_info:
# created at
COLL.nodes.create_index(
[("uid", 1), ("id", -1)],
unique=True,
)
if "uid_1_id_-1_title_1" not in nodes_info:
COLL.nodes.create_index(
[("uid", 1), ("id", -1), ("title", 1)],
unique=True,
)
if "uid_1_id_-1_searchKeys_1" not in nodes_info:
COLL.nodes.create_index(
[("uid", 1), ("searchKeys", TEXT), ("md", TEXT)],
)

import_data_info = COLL.import_data.index_information()
if "uid_1" not in import_data_info:
COLL.import_data.create_index("uid", unique=True)

user_file_info = COLL.user_file.index_information()
if "uid_1_fid_-1" not in user_file_info:
COLL.user_file.create_index([("uid", 1), ("fid", -1)], unique=True)


def init():
set_client()
db = CLIENT[config.get_settings().DB_NAME]
COLL.users = db["users"]
COLL.unids = db["unids"]
COLL.nodes = db["nodes"]
COLL.import_data = db["importData"]
COLL.user_file = db["userFile"]
Expand All @@ -71,14 +116,7 @@ def init():
{"$set": {"startAt": doc["startAt"].replace(tzinfo=utc)}},
)

if isinstance(CLIENT, MongoClient):
# try creating index
COLL.users.create_index("id", unique=True)
COLL.users.create_index(["account", "source"], unique=True)
COLL.nodes.create_index("id", unique=True)
COLL.unids.create_index("id", unique=True)
COLL.import_data.create_index("uid", unique=True)
COLL.user_file.create_index(["uid", "fid"], unique=True)
try_build_index()

# try add default user
if config.get_settings().ONE_USER:
Expand All @@ -89,13 +127,15 @@ def init():

logger.info("running at the first time, a user with initial data will be created")
language = os.getenv("VUE_APP_LANGUAGE", const.Language.EN.value)
uid = utils.short_uuid()
ns = const.NEW_USER_DEFAULT_NODES[language]

def create_node(md: str):
title_, snippet_ = utils.preprocess_md(md)
n: Node = {
"_id": ObjectId(),
"id": utils.short_uuid(),
"uid": uid,
"title": title_,
"snippet": snippet_,
"md": md,
Expand All @@ -119,7 +159,7 @@ def create_node(md: str):

u: UserMeta = {
"_id": ObjectId(),
"id": utils.short_uuid(),
"id": uid,
"source": const.UserSource.LOCAL.value,
"account": const.DEFAULT_USER["email"],
"email": const.DEFAULT_USER["email"],
Expand All @@ -139,12 +179,6 @@ def create_node(md: str):
}
}
_ = COLL.users.insert_one(u)
unids: UserNodeIds = {
"_id": u["_id"],
"id": u["id"],
"nodeIds": [n0["id"], n1["id"]],
}
_ = COLL.unids.insert_one(unids)


def get_client():
Expand Down
54 changes: 4 additions & 50 deletions src/rethink/models/db_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

def remove_from_node(from_nid: str, to_nid: str):
if config.is_local_db():
# no $pull support
to_n = COLL.nodes.find_one({"id": to_nid})
if to_n is None:
return
Expand All @@ -27,29 +28,10 @@ def remove_from_node(from_nid: str, to_nid: str):
)


# def remove_to_node(from_nid: str, to_nid: str):
# if config.is_local_db():
# from_n = COLL.nodes.find_one({"id": from_nid})
# if from_n is None:
# return
# try:
# from_n["toNodeIds"].remove(to_nid)
# COLL.nodes.update_one(
# {"id": from_nid},
# {"$set": {"toNodeIds": from_n["toNodeIds"]}}
# )
# except ValueError:
# pass
# else:
# COLL.nodes.update_one(
# {"id": from_nid},
# {"$pull": {"toNodeIds": to_nid}}
# )


def node_add_to_set(id_: str, key: str, value: Any) -> UpdateResult:
res = UpdateResult(0, 0)
if config.is_local_db():
# no $addToSet support
has_new = False
doc = COLL.nodes.find_one({"id": id_})
if doc is None:
Expand All @@ -73,38 +55,10 @@ def node_add_to_set(id_: str, key: str, value: Any) -> UpdateResult:


def nodes_get(
uid: str,
ids: List[str],
assert_conditions: Dict[str, Any]
) -> List[Node]:
c = {"id": {"$in": ids}}
if not config.is_local_db():
c.update(assert_conditions)

c = {"id": {"$in": ids}, "uid": uid, **assert_conditions}
docs = list(COLL.nodes.find(c))
if config.is_local_db():
for k, c in assert_conditions.items():
for i in range(len(docs) - 1, -1, -1):
if docs[i][k] != c:
docs.pop(i)
return docs


def remove_nids(uid: str, nids: List[str]) -> UpdateResult:
res = UpdateResult(0, 0)
if config.is_local_db():
doc = COLL.unids.find_one({"id": uid})
if doc is None:
return res
for nid in nids:
if nid in doc["nodeIds"]:
doc["nodeIds"].remove(nid)
res = COLL.unids.update_one(
{"id": uid},
{"$set": {"nodeIds": doc["nodeIds"]}}
)
else:
res = COLL.unids.update_one(
{"id": uid},
{"$pull": {"nodeIds": {"$in": nids}}}
)
return res
19 changes: 16 additions & 3 deletions src/rethink/models/files/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,14 @@ def update_process(
if code is not None:
data["code"] = code
if is_local_db():
# local db not support find_one_and_update
COLL.import_data.update_one({"uid": uid}, {"$set": data})
doc = COLL.import_data.find_one({"uid": uid})
else:
doc = COLL.import_data.find_one_and_update({"uid": uid}, {
"$set": data})
doc = COLL.import_data.find_one_and_update(
{"uid": uid},
{"$set": data}
)
if doc is None:
return doc, const.Code.OPERATION_FAILED
return doc, const.Code.OK
Expand Down Expand Up @@ -155,7 +158,17 @@ def upload_obsidian_thread(
md=md,
refresh_on_same_md=True,
)
if code != const.Code.OK:
if code == const.Code.NODE_NOT_EXIST:
n, code = models.node.add(
uid=uid,
md=md,
type_=const.NodeType.MARKDOWN.value,
)
if code != const.Code.OK:
__set_running_false(uid, code, [filepath])
logger.info(f"error: {code}, filepath: {filepath}, uid: {uid}")
return
elif code != const.Code.OK:
__set_running_false(uid, code, [filepath])
logger.info(f"error: {code}, filepath: {filepath}, uid: {uid}")
return
Expand Down
52 changes: 18 additions & 34 deletions src/rethink/models/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ def add(
data: tps.Node = {
"_id": _id,
"id": nid,
"uid": uid,
"title": title,
"snippet": snippet,
"md": md,
Expand All @@ -101,12 +102,6 @@ def add(
res = COLL.nodes.insert_one(data)
if not res.acknowledged:
return None, const.Code.OPERATION_FAILED
res = COLL.unids.update_one(
{"id": uid},
{"$push": {"nodeIds": nid}}
)
if res.modified_count != 1:
return None, const.Code.OPERATION_FAILED

user.update_used_space(uid=uid, delta=new_size)

Expand Down Expand Up @@ -151,15 +146,10 @@ def get_batch(
with_disabled: bool = False,
in_trash: bool = False,
) -> Tuple[List[tps.Node], const.Code]:
unids, code = user.get_node_ids(uid=uid)
if code != const.Code.OK:
return [], code
for nid in nids:
if nid not in unids:
return [], const.Code.NODE_NOT_EXIST
assert_conditions = {} if with_disabled else {"disabled": False}
assert_conditions.update({"inTrash": in_trash})
docs = db_ops.nodes_get(ids=nids, assert_conditions=assert_conditions)
c = {"id": {"$in": nids}, "uid": uid, "inTrash": in_trash}
if not with_disabled:
c["disabled"] = False
docs = list(COLL.nodes.find(c))
if len(docs) != len(nids):
return [], const.Code.NODE_NOT_EXIST

Expand Down Expand Up @@ -193,6 +183,9 @@ def update(
return n, const.Code.OK

old_md_size = len(n["md"].encode("utf-8"))
new_data = {
"modifiedAt": datetime.datetime.now(tz=utc),
}

if n["title"] != title:
# update it's title in fromNodes md's link
Expand All @@ -202,14 +195,14 @@ def update(
n, code = update(uid=uid, nid=from_node["id"], md=new_md)
if code != const.Code.OK:
logger.info(f"update fromNode {from_node['id']} failed")
new_data["title"] = title
new_data["searchKeys"] = utils.txt2search_keys(title)

if n["md"] != md:
new_data["md"] = md
if n["snippet"] != snippet:
new_data["snippet"] = snippet

new_data = {
"title": title,
"searchKeys": utils.txt2search_keys(title),
"md": md,
"snippet": snippet,
"modifiedAt": datetime.datetime.now(tz=utc),
}
new_data["toNodeIds"], code = __flush_to_node_ids(
nid=n["id"], orig_to_nid=n["toNodeIds"], new_md=md)
if code != const.Code.OK:
Expand All @@ -222,6 +215,7 @@ def update(
return_document=True, # return updated doc
)
else:
# local db not support find_one_and_update
res = COLL.nodes.update_one(
{"id": nid},
{"$set": new_data}
Expand Down Expand Up @@ -312,12 +306,8 @@ def batch_to_trash(uid: str, nids: List[str]) -> const.Code:


def get_nodes_in_trash(uid: str, page: int, page_size: int) -> Tuple[List[tps.Node], int]:
unids, code = user.get_node_ids(uid=uid)
if code != const.Code.OK:
return [], 0

condition = {
"id": {"$in": unids},
"uid": uid,
"disabled": False,
"inTrash": True,
}
Expand Down Expand Up @@ -374,17 +364,11 @@ def batch_delete(uid: str, nids: List[str]) -> const.Code:
user.update_used_space(uid=uid, delta=used_space_delta)

# remove node
res = COLL.nodes.delete_many({"id": {"$in": nids}})
res = COLL.nodes.delete_many({"id": {"$in": nids}, "uid": uid})
if res.deleted_count != len(nids):
logger.error(f"delete nodes {nids} failed")
return const.Code.OPERATION_FAILED

# update user nodeIds
res = db_ops.remove_nids(uid, nids)
if res.matched_count != 1:
logger.error(f"update user {uid} failed")
return const.Code.OPERATION_FAILED

return const.Code.OK


Expand Down
Loading

0 comments on commit 871f037

Please sign in to comment.