From 1a03c7886345b64d66ecf1337e313a81f3ff038c Mon Sep 17 00:00:00 2001 From: Paul Yu-Chun Chang Date: Thu, 19 Sep 2024 10:45:01 +0000 Subject: [PATCH] feat: alternative rabbi names --- VirtualHavruta/vh.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/VirtualHavruta/vh.py b/VirtualHavruta/vh.py index affce3a..1e8cffb 100644 --- a/VirtualHavruta/vh.py +++ b/VirtualHavruta/vh.py @@ -1101,6 +1101,18 @@ def fetch_and_cache_topics(): topics = [] return topics + def preprocess_topic_names(extraction): + topic_names = extraction.split(",") + updated_topic_names = [] + for topic in topic_names: + updated_name = topic.strip() + updated_topic_names.append(updated_name) + if updated_name.lower().startswith('rabbi'): + alt_name = updated_name[6:].strip() + if alt_name: + updated_topic_names.append(alt_name) + return updated_topic_names + def find_topic_slugs(topic_names, all_topics): slugs = [] name_set = {name.lower() for name in topic_names} @@ -1122,9 +1134,9 @@ def get_topic_descriptions(topic_slugs): descriptions[slug] = topic_data['description']['en'] self.logger.info(f"MsgID={msgid}. [ONTOLOGY] Retrieved topic descriptions: {descriptions}") return descriptions - + # Process the extraction string - topic_names = extraction.split(", ") + topic_names = preprocess_topic_names(extraction) self.logger.info(f"MsgID={msgid}. [ONTOLOGY] Extracted topic names: {topic_names}") # Get all topics