Skip to content

Commit

Permalink
Merge pull request #21 from Sefaria/feat/expand-topics
Browse files Browse the repository at this point in the history
feat: expand topics
  • Loading branch information
Paul-Yu-Chun-Chang authored Aug 13, 2024
2 parents f77fc98 + 6c4adde commit 94ed3e0
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 2 deletions.
41 changes: 40 additions & 1 deletion VirtualHavruta/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,4 +263,43 @@ def construct_db_filter(matched_filters: dict) -> dict:
elif filter_conditions:
return filter_conditions[0] # Return the single condition without $or
else:
return {} # Return an empty dict if no conditions are provided
return {} # Return an empty dict if no conditions are provided

def merge_topics(extraction: str = '', matched_topics: dict = {}) -> str:
"""
Merges a comma-separated string of topics with an existing dictionary of lists of topics,
avoiding duplicates.
Parameters:
- extraction (str): A string of topics separated by commas (e.g., 'topic1, topic2, topic3').
- matched_topics (dict): A dictionary with lists of topics that have already been matched.
Returns:
- str: A string of combined topics, separated by commas with no duplicates.
"""
# Split the extraction string into a list of topics
extraction_lst = extraction.split(', ')

# Create a list and a set of topics from all lists within the dictionary for quick membership testing
joint_matched_topics = []
joint_matched_topics_set = set()

for topics in matched_topics.values():
for topic in topics:
if topic.lower() not in joint_matched_topics_set:
joint_matched_topics.append(topic)
joint_matched_topics_set.add(topic.lower())

# Iterate over each topic in the extracted list
for topic in extraction_lst:
# Convert topic to lowercase
lower_topic = topic.lower()
# Check if the lowercased topic is not already in the set
if lower_topic not in joint_matched_topics_set:
# If the topic is new, append it to the matched_topics list
joint_matched_topics.append(topic)
# Add the lowercased topic to the set to keep track of seen topics
joint_matched_topics_set.add(lower_topic)

# Join the updated matched topics list into a string separated by commas
return ', '.join(joint_matched_topics)
3 changes: 2 additions & 1 deletion config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ database:
username: user
password: password@dev
top_k: 15
metadata_fields: ['metadata_field_name1', 'metadata_field_name1']
metadata_fields: ['metadata_field_name1', 'metadata_field_name2']
topic_fields: ['topic_field_name1', 'topic_field_name2']
kg:
url: bolt://publicip_kg:7687
username: user
Expand Down

0 comments on commit 94ed3e0

Please sign in to comment.