Skip to content

Commit

Permalink
feat(py): expose python caching utilities
Browse files Browse the repository at this point in the history
  • Loading branch information
cmdoret committed Oct 21, 2024
1 parent 6131ad5 commit f3cd9b1
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 3 deletions.
1 change: 1 addition & 0 deletions pyfuzon/python/pyfuzon/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
__all__ = pyfuzon.__all__

from .matcher import TermMatcher
from .cache import get_cache_key, get_cache_path
28 changes: 28 additions & 0 deletions pyfuzon/python/pyfuzon/cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
"""Caching utilities for pyfuzon.
This module provides functions to help manage the cache of TermMatchers.
Cache keys are built by fuzon using the sorted source paths. For each path,
a stamp is computed as follows (missing values are replaced by empty strings):
+ file path: {path}-{size}-{last-modified-datetime}
+ url: {url}-{etag-checksum}-{last-modified-datetime}
All stamps are then concatenated and hash of the result is used as the cache key.
Cache paths adhere to the following specifications:
+ Linux: XDG base / user directory
+ Windows: Known folder API
+ MacOS: Standard Directories guidelines
For more information, see: https://github.com/dirs-dev/dirs-rs
"""

from pathlib import Path

from .pyfuzon import get_cache_key as _get_cache_key
from .pyfuzon import get_cache_path as _get_cache_path

def get_cache_key(sources: list[str]) -> str:
"""Return a deterministic cache key based on a collection of source paths."""
return _get_cache_key(sources)

def get_cache_path(sources: list[str]) -> Path:
"""Return a full platform-specific cache path based on a collection of source paths."""
return Path(_get_cache_path(sources))
6 changes: 3 additions & 3 deletions pyfuzon/python/pyfuzon/matcher.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from dataclasses import dataclass
from typing import Self

from dataclasses import dataclass

from pyfuzon import Term, score_terms, parse_files, load_terms, dump_terms
Expand All @@ -10,7 +11,7 @@ class TermMatcher:
"""Fuzzy matches terms from RDF terminologies to input queries."""

terms: list[Term]

def top(self, query: str, n: int=5) -> list[Term]:
"""Return the n terms most similar to input query."""
return self.rank(query)[:n]
Expand All @@ -19,7 +20,7 @@ def rank(self, query: str) -> list[Term]:
"""Return all terms, ranked by query similarity."""
scores = self.score(query)
ranks = [
i[0] for i in
i[0] for i in
sorted(enumerate(scores), key=lambda x:x[1], reverse=True)
]
return [self.terms[rank] for rank in ranks]
Expand All @@ -45,4 +46,3 @@ def load(cls, path):
def dump(self, path):
"""Serialize to disk."""
dump_terms(self.terms, path)

0 comments on commit f3cd9b1

Please sign in to comment.