Skip to content

Commit

Permalink
Merge branch 'main' of github.com:SpencerPresley/COSC425-DATA
Browse files Browse the repository at this point in the history
  • Loading branch information
SpencerPresley committed Dec 17, 2024
2 parents 5a3382f + f3124dc commit f5c57fd
Showing 1 changed file with 38 additions and 36 deletions.
74 changes: 38 additions & 36 deletions src/academic_metrics/DB/DatabaseSetup.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,20 +184,6 @@ def update_category(
existing_data["citation_average"] = new_average
self.logger.debug(f"Updated citation average to: {new_average}")

# Update numeric counts
existing_data["faculty_count"] = existing_data.get(
"faculty_count", 0
) + new_data.get("faculty_count", 0)
existing_data["department_count"] = existing_data.get(
"department_count", 0
) + new_data.get("department_count", 0)
existing_data["article_count"] = existing_data.get(
"article_count", 0
) + new_data.get("article_count", 0)
existing_data["tc_count"] = existing_data.get("tc_count", 0) + new_data.get(
"tc_count", 0
)

# Update lists using set operations with None protection
existing_data["doi_list"] = list(set(existing_dois).union(new_dois))

Expand Down Expand Up @@ -225,6 +211,14 @@ def update_category(
)
)

# Update numeric counts
existing_data["faculty_count"] = len(existing_data["faculty"])
existing_data["department_count"] = len(existing_data["departments"])
existing_data["article_count"] = len(existing_data["titles"])
existing_data["tc_count"] = existing_data.get("tc_count", 0) + new_data.get(
"tc_count", 0
)

self.logger.debug(
f"Updated counts - Faculty: {existing_data['faculty_count']}, "
f"Departments: {existing_data['department_count']}, "
Expand Down Expand Up @@ -422,6 +416,13 @@ def run_all_process(
self.process(article_data, "article_data")
self.process(faculty_data, "faculty_data")

def fix_counts(self):
existing_data = list(self.category_collection.find({}))
for data in existing_data:
data["faculty_count"] = len(data["faculty"])
data["department_count"] = len(data["departments"])
self.category_collection.update_one({"_id": data["_id"]}, {"$set": data})

def clear_collection(self):
"""Clear the entire collection."""
self.category_collection.delete_many({})
Expand All @@ -440,28 +441,29 @@ def close_connection(self):
load_dotenv()
mongo_url = os.getenv("MONGODB_URL")

# Handle article data
with open(
"../../data/core/output_files/test_processed_article_stats_obj_data.json", "r"
) as f:
article_data = json.load(f)

# Handle category data
with open(
"../../data/core/output_files/test_processed_category_data.json", "r"
) as f:
category_data = json.load(f)

# Handle faculty data
with open(
"../../data/core/output_files/test_processed_global_faculty_stats_data.json",
"r",
) as f:
faculty_data = json.load(f)
# # Handle article data
# with open(
# "../../data/core/output_files/test_processed_article_stats_obj_data.json", "r"
# ) as f:
# article_data = json.load(f)

# # Handle category data
# with open(
# "../../data/core/output_files/test_processed_category_data.json", "r"
# ) as f:
# category_data = json.load(f)

# # Handle faculty data
# with open(
# "../../data/core/output_files/test_processed_global_faculty_stats_data.json",
# "r",
# ) as f:
# faculty_data = json.load(f)

database = DatabaseWrapper(db_name="Site_Data", mongo_url=mongo_url)
database.clear_collection()
# database.clear_collection()

database.process(article_data, "article_data")
database.process(category_data, "category_data")
database.process(faculty_data, "faculty_data")
# database.process(article_data, "article_data")
# database.process(category_data, "category_data")
# database.process(faculty_data, "faculty_data")
database.fix_counts()

0 comments on commit f5c57fd

Please sign in to comment.