From 1a03c7886345b64d66ecf1337e313a81f3ff038c Mon Sep 17 00:00:00 2001
From: Paul Yu-Chun Chang
Date: Thu, 19 Sep 2024 10:45:01 +0000
Subject: [PATCH] feat: alternative rabbi names
---
VirtualHavruta/vh.py | 16 ++++++++++++++--
1 file changed, 14 insertions(+), 2 deletions(-)
diff --git a/VirtualHavruta/vh.py b/VirtualHavruta/vh.py
index affce3a..1e8cffb 100644
--- a/VirtualHavruta/vh.py
+++ b/VirtualHavruta/vh.py
@@ -1101,6 +1101,18 @@ def fetch_and_cache_topics():
topics = []
return topics
+ def preprocess_topic_names(extraction):
+ topic_names = extraction.split(",")
+ updated_topic_names = []
+ for topic in topic_names:
+ updated_name = topic.strip()
+ updated_topic_names.append(updated_name)
+ if updated_name.lower().startswith('rabbi'):
+ alt_name = updated_name[6:].strip()
+ if alt_name:
+ updated_topic_names.append(alt_name)
+ return updated_topic_names
+
def find_topic_slugs(topic_names, all_topics):
slugs = []
name_set = {name.lower() for name in topic_names}
@@ -1122,9 +1134,9 @@ def get_topic_descriptions(topic_slugs):
descriptions[slug] = topic_data['description']['en']
self.logger.info(f"MsgID={msgid}. [ONTOLOGY] Retrieved topic descriptions: {descriptions}")
return descriptions
-
+
# Process the extraction string
- topic_names = extraction.split(", ")
+ topic_names = preprocess_topic_names(extraction)
self.logger.info(f"MsgID={msgid}. [ONTOLOGY] Extracted topic names: {topic_names}")
# Get all topics