From 015399bbfac7540533071833599d9d0210f7939f Mon Sep 17 00:00:00 2001 From: Alejandro Velez-Arce Date: Thu, 22 Aug 2024 19:12:43 -0400 Subject: [PATCH] add pinnacle output metadata --- tdc/metadata.py | 20 ++++++++++++++++++++ tdc/resource/pinnacle.py | 6 ++++++ 2 files changed, 26 insertions(+) diff --git a/tdc/metadata.py b/tdc/metadata.py index abac5c01..24019db2 100644 --- a/tdc/metadata.py +++ b/tdc/metadata.py @@ -927,6 +927,16 @@ def get_task2category(): "pinnacle_protein_embed": "pth", "pinnacle_labels_dict": "txt", "panpep": "tab", + "pinnacle_output1": "zip", + "pinnacle_output2": "zip", + "pinnacle_output3": "zip", + "pinnacle_output4": "zip", + "pinnacle_output5": "zip", + "pinnacle_output6": "zip", + "pinnacle_output7": "zip", + "pinnacle_output8": "zip", + "pinnacle_output9": "zip", + "pinnacle_output10": "zip", } name2id = { @@ -1104,6 +1114,16 @@ def get_task2category(): "pinnacle_protein_embed": 10407128, "pinnacle_labels_dict": 10409635, "panpep": 10428565, + "pinnacle_output1": 10431072, + "pinnacle_output2": 10431073, + "pinnacle_output3": 10431078, + "pinnacle_output4": 10431080, + "pinnacle_output5": 10431077, + "pinnacle_output6": 10431076, + "pinnacle_output7": 10431079, + "pinnacle_output8": 10431074, + "pinnacle_output9": 10431075, + "pinnacle_output10": 10431081, } oracle2type = { diff --git a/tdc/resource/pinnacle.py b/tdc/resource/pinnacle.py index fefcab94..e06d8fd1 100644 --- a/tdc/resource/pinnacle.py +++ b/tdc/resource/pinnacle.py @@ -86,3 +86,9 @@ def get_embeds(self): x), "dims not mantained when translated to pandas. {} vs {}".format( len(df), len(x)) return df + + def get_exp_data(self, seed=1, split="train"): + if split not in ["train", "val", "test", "all"]: + raise ValueError("{} not a valid split".format(split)) + filename = "pinnacle_output{}".format(seed) +