RosettaCommons · snerligit · Sep 22, 2018
diff --git a/apps/RosettaMHC/.gitignore b/apps/RosettaMHC/.gitignore
@@ -0,0 +1,3 @@
+__pycache__
+*.pyc
+.DS_store
diff --git a/apps/RosettaMHC/README.md b/apps/RosettaMHC/README.md
@@ -0,0 +1,20 @@
+# MHC peptide threading protocol
+
+This protocol requires the following libraries
+
+    * Python3.x
+
+    * PyRosetta4
+
+    * Biopython
+
+    * clustal omega (command line interface)
+
+    * Rosetta (optional)
+
+
+A detailed description of the protocol together with commands is under construction.
+
+If you use this work, please cite:
+
+Toor JS, Rao AA, McShan AC, Yarmarkovich M, Nerli S, Yamaguchi K, Madejska AA, Nguyen S, Tripathi S, Maris JM, Salama SR, Haussler D and Sgourakis NG (2018) A Recurrent Mutation in Anaplastic Lymphoma Kinase with Distinct Neoepitope Conformations. Front. Immunol. 9:99. doi: 10.3389/fimmu.2018.00099
diff --git a/apps/RosettaMHC/alignment/align.py b/apps/RosettaMHC/alignment/align.py
@@ -0,0 +1,66 @@
+#!/usr/bin/python
+
+#       Sgourakis Lab
+#   Author: Santrupti Nerli
+#   Date: January 26, 2017
+#   Email: [email protected]
+#
+
+'''
+
+ALIGN class contains all the necessary functionalities required to perform sequence
+alignment between the template and the target sequences.
+
+'''
+
+# additional bio libraries
+# Refernce: Fast, scalable generation of high-quality protein multiple sequence alignments using Clustal Omega. 
+# (2011 October 11) Molecular systems biology 7 :539
+from Bio.Align.Applications import ClustalOmegaCommandline
+
+#custom libraries
+from alignment.substitution_matrix import BLOSUM
+
+# import other required libraries
+import os
+import sys
+import subprocess
+
+class ALIGN:
+
+    # class members
+    template_seq = None # amino acid sequence for a given template
+    target_seq = None # amino acid sequence for the required target
+    matrix_type = 62 # matrix type to use for scoring alignment
+    clustal_input = "" # input filename for clustal omega
+    clustal_output = "" # output filename for clustal omega
+
+    # constructor
+    def __init__(self, template_seq, target_seq, matrix_type = 62):
+        self.template_seq = template_seq
+        self.matrix_type = matrix_type
+        self.target_seq = target_seq
+        self.clustal_input = "clustal_default_input.fasta"
+        self.clustal_output = self.clustal_input+"_clustal_output.fasta"
+
+    # method to create clustal input file
+    def init_clustal_input(self):
+        writefilehandle = open(self.clustal_input, "w")
+        writefilehandle.write(">template\n")
+        writefilehandle.write(self.template_seq+"\n")
+        writefilehandle.write(">target\n")
+        writefilehandle.write(self.target_seq+"\n")
+        writefilehandle.close()
+
+    # method that calls externally installed clustal program
+    # read the input sequences from clustal input file and write to the
+    # clustal output file
+    def clustal(self):
+        self.init_clustal_input()
+        cline = ClustalOmegaCommandline(infile=self.clustal_input, outfile=self.clustal_output, distmat_full=True,
+                                        verbose=True, seqtype="Protein", outfmt="vienna", iterations=10, percentid=True, force=True)
+        cline()
+
+    # getter method
+    def get_clustal_output_filename(self):
+        return self.clustal_output
diff --git a/apps/RosettaMHC/alignment/grishin.py b/apps/RosettaMHC/alignment/grishin.py
@@ -0,0 +1,55 @@
+#!/usr/bin/python
+
+#       Sgourakis Lab
+#   Author: Santrupti Nerli
+#   Date: January 26, 2017
+#   Email: [email protected]
+#
+
+'''
+
+GRISHIN class contains all the necessary functionalities required to create
+Rosetta specific alignment file.
+
+'''
+
+# import other required libraries
+import os
+import sys
+import subprocess
+
+class GRISHIN:
+
+    # class members
+    filename = "" # grishin file name
+    target_head = "" # target name
+    template_head = "" # template name
+    target_sequence = "" # target sequence
+    template_sequence = "" # template sequence
+
+    # constructor
+    def __init__(self, filename, target_head, template_head, target_sequence, template_sequence):
+        self.filename = filename
+        self.target_head = target_head
+        self.template_head = template_head
+        self.target_sequence = target_sequence
+        self.template_sequence = template_sequence
+
+    # method to create and write to the grishin file
+    # the formatting is very specific to Rosetta
+    # See the link: https://www.rosettacommons.org/docs/latest/rosetta_basics/file_types/Grishan-format-alignment
+    def write(self, is_new = False):
+        if is_new:
+            writefile = open(self.get_file_name(), "w")
+        else:
+            writefile = open(self.get_file_name(), "a")
+        writefile.write("## "+self.target_head+" "+self.template_head+"\n"+"#"+"\n")
+        writefile.write("scores_from_program: 0\n")
+        writefile.write("0 "+self.target_sequence+"\n")
+        writefile.write("0 "+self.template_sequence+"\n")
+        writefile.write("--\n")
+        writefile.close()
+
+    # getter method
+    def get_file_name(self):
+        return self.filename+".grishin"
diff --git a/apps/RosettaMHC/alignment/substitution_matrix.py b/apps/RosettaMHC/alignment/substitution_matrix.py
@@ -0,0 +1,61 @@
+#!/usr/bin/python
+
+#       Sgourakis Lab
+#   Author: Santrupti Nerli
+#   Date: Jan 16, 2017
+#   Email: [email protected]
+#
+
+'''
+
+BLOSUM class contains all the necessary functionalities required to
+obtain BLOSUM scoring matrix based on the sequence alignment identity.
+For the purposes of MHC modeling we use BLOSUM62
+
+'''
+
+# bio libraries
+from Bio.SubsMat.MatrixInfo import *
+
+# import other required libraries
+from collections import defaultdict
+
+class BLOSUM:
+
+    # class members
+    BLOSUMX = defaultdict(dict) # variable to store the matrix
+
+    # constructor
+    # we support only three types of matrices
+    # each with sequence identities 45%, 62% and 80%
+    def __init__(self, type=62):
+        if type == 62:
+            blosum = blosum62
+        elif type == 80:
+            blosum = blosum80
+        elif type == 45:
+            blosum = blosum45
+        else:
+            print("Unknown substitution matrix")
+
+        # fetch the matrix and convert it to dictionary format
+        # for easier access
+        for key in blosum:
+            key0 = key[0]
+            key1 = key[1]
+            self.BLOSUMX[key0][key1] = blosum[key]
+            self.BLOSUMX[key1][key0] = blosum[key]
+
+        # provide scores to gaps denoted by asterisk
+        starArr = {}
+        for key in self.BLOSUMX:
+            starArr[key] = -4
+            starArr['*'] = 1
+
+        for key in starArr:
+            self.BLOSUMX['*'][key] = starArr[key]
+            self.BLOSUMX[key]['*'] = starArr[key]
+
+    # getter method
+    def get_matrix(self):
+        return self.BLOSUMX