Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions apps/RosettaMHC/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
__pycache__
*.pyc
.DS_store
20 changes: 20 additions & 0 deletions apps/RosettaMHC/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# MHC peptide threading protocol

This protocol requires the following libraries

* Python3.x

* PyRosetta4

* Biopython

* clustal omega (command line interface)

* Rosetta (optional)


A detailed description of the protocol together with commands is under construction.

If you use this work, please cite:

Toor JS, Rao AA, McShan AC, Yarmarkovich M, Nerli S, Yamaguchi K, Madejska AA, Nguyen S, Tripathi S, Maris JM, Salama SR, Haussler D and Sgourakis NG (2018) A Recurrent Mutation in Anaplastic Lymphoma Kinase with Distinct Neoepitope Conformations. Front. Immunol. 9:99. doi: 10.3389/fimmu.2018.00099
66 changes: 66 additions & 0 deletions apps/RosettaMHC/alignment/align.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#!/usr/bin/python

# Sgourakis Lab
# Author: Santrupti Nerli
# Date: January 26, 2017
# Email: [email protected]
#

'''

ALIGN class contains all the necessary functionalities required to perform sequence
alignment between the template and the target sequences.

'''

# additional bio libraries
# Refernce: Fast, scalable generation of high-quality protein multiple sequence alignments using Clustal Omega.
# (2011 October 11) Molecular systems biology 7 :539
from Bio.Align.Applications import ClustalOmegaCommandline

#custom libraries
from alignment.substitution_matrix import BLOSUM

# import other required libraries
import os
import sys
import subprocess

class ALIGN:

# class members
template_seq = None # amino acid sequence for a given template
target_seq = None # amino acid sequence for the required target
matrix_type = 62 # matrix type to use for scoring alignment
clustal_input = "" # input filename for clustal omega
clustal_output = "" # output filename for clustal omega

# constructor
def __init__(self, template_seq, target_seq, matrix_type = 62):
self.template_seq = template_seq
self.matrix_type = matrix_type
self.target_seq = target_seq
self.clustal_input = "clustal_default_input.fasta"
self.clustal_output = self.clustal_input+"_clustal_output.fasta"

# method to create clustal input file
def init_clustal_input(self):
writefilehandle = open(self.clustal_input, "w")
writefilehandle.write(">template\n")
writefilehandle.write(self.template_seq+"\n")
writefilehandle.write(">target\n")
writefilehandle.write(self.target_seq+"\n")
writefilehandle.close()

# method that calls externally installed clustal program
# read the input sequences from clustal input file and write to the
# clustal output file
def clustal(self):
self.init_clustal_input()
cline = ClustalOmegaCommandline(infile=self.clustal_input, outfile=self.clustal_output, distmat_full=True,
verbose=True, seqtype="Protein", outfmt="vienna", iterations=10, percentid=True, force=True)
cline()

# getter method
def get_clustal_output_filename(self):
return self.clustal_output
55 changes: 55 additions & 0 deletions apps/RosettaMHC/alignment/grishin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#!/usr/bin/python

# Sgourakis Lab
# Author: Santrupti Nerli
# Date: January 26, 2017
# Email: [email protected]
#

'''

GRISHIN class contains all the necessary functionalities required to create
Rosetta specific alignment file.

'''

# import other required libraries
import os
import sys
import subprocess

class GRISHIN:

# class members
filename = "" # grishin file name
target_head = "" # target name
template_head = "" # template name
target_sequence = "" # target sequence
template_sequence = "" # template sequence

# constructor
def __init__(self, filename, target_head, template_head, target_sequence, template_sequence):
self.filename = filename
self.target_head = target_head
self.template_head = template_head
self.target_sequence = target_sequence
self.template_sequence = template_sequence

# method to create and write to the grishin file
# the formatting is very specific to Rosetta
# See the link: https://www.rosettacommons.org/docs/latest/rosetta_basics/file_types/Grishan-format-alignment
def write(self, is_new = False):
if is_new:
writefile = open(self.get_file_name(), "w")
else:
writefile = open(self.get_file_name(), "a")
writefile.write("## "+self.target_head+" "+self.template_head+"\n"+"#"+"\n")
writefile.write("scores_from_program: 0\n")
writefile.write("0 "+self.target_sequence+"\n")
writefile.write("0 "+self.template_sequence+"\n")
writefile.write("--\n")
writefile.close()

# getter method
def get_file_name(self):
return self.filename+".grishin"
61 changes: 61 additions & 0 deletions apps/RosettaMHC/alignment/substitution_matrix.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#!/usr/bin/python

# Sgourakis Lab
# Author: Santrupti Nerli
# Date: Jan 16, 2017
# Email: [email protected]
#

'''

BLOSUM class contains all the necessary functionalities required to
obtain BLOSUM scoring matrix based on the sequence alignment identity.
For the purposes of MHC modeling we use BLOSUM62

'''

# bio libraries
from Bio.SubsMat.MatrixInfo import *

# import other required libraries
from collections import defaultdict

class BLOSUM:

# class members
BLOSUMX = defaultdict(dict) # variable to store the matrix

# constructor
# we support only three types of matrices
# each with sequence identities 45%, 62% and 80%
def __init__(self, type=62):
if type == 62:
blosum = blosum62
elif type == 80:
blosum = blosum80
elif type == 45:
blosum = blosum45
else:
print("Unknown substitution matrix")

# fetch the matrix and convert it to dictionary format
# for easier access
for key in blosum:
key0 = key[0]
key1 = key[1]
self.BLOSUMX[key0][key1] = blosum[key]
self.BLOSUMX[key1][key0] = blosum[key]

# provide scores to gaps denoted by asterisk
starArr = {}
for key in self.BLOSUMX:
starArr[key] = -4
starArr['*'] = 1

for key in starArr:
self.BLOSUMX['*'][key] = starArr[key]
self.BLOSUMX[key]['*'] = starArr[key]

# getter method
def get_matrix(self):
return self.BLOSUMX
Loading