Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added support for NetMHCstabpan (GH Issue #64) #167

Merged
merged 4 commits into from
Mar 13, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ htmlcov/
.cache
nosetests.xml
coverage.xml
.hypothesis/

# Translations
*.mo
Expand Down
Binary file added .hypothesis/unicode_data/13.0.0/charmap.json.gz
Binary file not shown.
Binary file added .hypothesis/unicode_data/13.0.0/codec-utf-8.json.gz
Binary file not shown.
2 changes: 2 additions & 0 deletions mhctools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from .netmhc_pan41 import NetMHCpan41, NetMHCpan41_BA, NetMHCpan41_EL
from .netmhcii_pan import NetMHCIIpan, NetMHCIIpan3, NetMHCIIpan4, NetMHCIIpan4_BA, NetMHCIIpan4_EL
from .random_predictor import RandomBindingPredictor
from .netmhcstabpan import NetMHCstabpan
from .unsupported_allele import UnsupportedAllele

__version__ = "1.9.0"
Expand Down Expand Up @@ -54,6 +55,7 @@
"NetMHCIIpan4",
"NetMHCIIpan4_BA",
"NetMHCIIpan4_EL",
"NetMHCstabpan",
"RandomBindingPredictor",
"UnsupportedAllele",
]
45 changes: 45 additions & 0 deletions mhctools/netmhcstabpan.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from .base_commandline_predictor import BaseCommandlinePredictor
from .parsing import parse_netmhcstabpan

class NetMHCstabpan(BaseCommandlinePredictor):
def __init__(
self,
alleles,
default_peptide_lengths=[],
program_name="netMHCstabpan",
process_limit=-1,
flags=[]):
"""
Wrapper for NetMHCstabpan.
"""
BaseCommandlinePredictor.__init__(
self,
program_name=program_name,
alleles=alleles,
default_peptide_lengths=default_peptide_lengths,
parse_output_fn=parse_netmhcstabpan,
supported_alleles_flag="-listMHC",
input_file_flag="-f",
length_flag="-l",
allele_flag="-a",
extra_flags=flags,
process_limit=process_limit)

def predict_peptides(self, peptides):
peptide_lengths = set(len(p) for p in peptides)
if len(peptide_lengths) > 1:
raise ValueError("All peptides must be the same length")
return super().predict_peptides(peptides)

43 changes: 43 additions & 0 deletions mhctools/parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -600,3 +600,46 @@ def parse_netmhciipan4_stdout(
rank_index=8 if mode == "elution_score" else 12,
score_index=7 if mode == "elution_score" else 10,
transforms=transforms)

def parse_netmhcstabpan(
stdout,
prediction_method_name="netmhcstabpan",
sequence_key_mapping=None,
mode=None):
"""
# NetMHCstabpan version 1.0

# Input is in PEPTIDE format

HLA-A02:01 : Distance to traning data 0.000 (using nearest neighbor HLA-A02:01)

# Rank Threshold for Strong binding peptides 0.500
# Rank Threshold for Weak binding peptides 2.000
-----------------------------------------------------------------------------------------------------
pos HLA peptide Identity Pred Thalf(h) %Rank_Stab BindLevel
-----------------------------------------------------------------------------------------------------
0 HLA-A*02:01 AAAAAAAAAA PEPLIST 0.075 0.27 19.00
-----------------------------------------------------------------------------------------------------

Protein PEPLIST. Allele HLA-A*02:01. Number of high binders 0. Number of weak binders 0. Number of peptides 1

-----------------------------------------------------------------------------------------------------
"""

# the offset specified in "pos" (at index 0) is 1-based instead of 0-based. we adjust it to be
# 0-based, as in all the other netmhc predictors supported by this library.
transforms = {
0: lambda x: int(x) - 1,
}
return parse_stdout(
stdout=stdout,
prediction_method_name=prediction_method_name,
sequence_key_mapping=sequence_key_mapping,
key_index=3,
offset_index=0,
peptide_index=2,
allele_index=1,
score_index=5,
rank_index=6,
transforms=transforms)

6 changes: 6 additions & 0 deletions tests/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@ def eq_(x, y, msg=None):
else:
assert x == y, msg

def approx_(x, y, tol=1e-6, msg=None):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Want to oblige @timodonnell and call numpy.testing.assert_allclose here?

if msg is None:
assert abs(x - y) < tol
else:
assert abs(x - y) < tol, msg

def neq_(x, y, msg=None):
if msg is None:
assert x != y
Expand Down
62 changes: 62 additions & 0 deletions tests/test_netmhc_stabpan.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from .common import approx_
from mhctools import NetMHCstabpan


DEFAULT_ALLELE = 'HLA-A*02:01'

# Protein sequences to test with against known values of netMHCstabpan web server.
protein_sequences = [
"ALDKNLHQL",
"ALHEEVVCV",
"ALPPTVYEV",
"AVLGSFSYV",
"EMASVLFKA",
]

web_server_predictions = [
4.6393,
10.6011,
11.8201,
4.6722,
1.8404,
]

# REMINDER: a program called "netMHCstabpan" MUST be installed and working for
# this test suite to succeeed. Also all peptides must be the same length.


def test_netmhc_stabpan_accuracy():
# Check that the netMHCstabpan program is working and returning th eexpected outputs.
predictor = NetMHCstabpan(
alleles=[DEFAULT_ALLELE], program_name='netMHCstabpan'
)

binding_predictions = predictor.predict_peptides(protein_sequences)
stability_predictions = [p.score for p in binding_predictions]
rank_predictions = [p.percentile_rank for p in binding_predictions]

assert len(web_server_predictions) == len(binding_predictions)
assert len(stability_predictions) == len(binding_predictions)

for prank in rank_predictions:
# Make sure that correct mapping is done by checking percentiles aren't above 100.
assert prank < 100

for i, (expected, actual) in enumerate(zip(web_server_predictions, stability_predictions)):
# Check to make sure that the stability predictions are within 0.01 of the webserver values.
# This could be the result of different versions of dependencies or the nature of the ANN itself.
approx_(expected, actual, tol=0.01, msg="Peptide %d: expected %f but got %f" % (i, expected, actual))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd suggest using numpy.testing.assert_allclose here rather than writing your own function

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dhuvik I've been mercilessly rolling my own testing helpers so might be setting a bad precedent but agree with Tim

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks Tim! Will go ahead and make that change



Loading