-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathaa_weight.c
146 lines (126 loc) · 14.6 KB
/
aa_weight.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
//
// aa_weight.c
// VVP_dev_xcode
//
// Created by STEVEN FLYGARE on 10/11/16.
// Copyright © 2016 IDbyDNA. All rights reserved.
//
#include "aa_weight.h"
#define NUM_KEYS 231
#define CONS 1.4121
#define UNCONS 0.3022
static struct aa_matrix * aam;
void init_aa_score(){
//initialize amino acid substitution scores. based on matrix0.13.log
char * keys[NUM_KEYS] = { "*0X","A0D","A0E","A0G","A0M","A0N","A0P","A0R","A0S","A0T","A0V","C0F","C0G","C0H","C0L","C0R","C0S","C0W","C0Y","D0A","D0E","D0G","D0H","D0N","D0V","D0Y","E0A","E0D","E0G","E0K","E0L","E0N","E0Q","E0V","F0C","F0G","F0I","F0L","F0S","F0V","F0Y","G0A","G0C","G0D","G0E","G0K","G0M","G0R","G0S","G0T","G0V","G0W","H0C","H0D","H0L","H0N","H0P","H0Q","H0R","H0Y","I0F","I0K","I0L","I0M","I0N","I0R","I0S","I0T","I0V","K0A","K0D","K0E","K0I","K0M","K0N","K0Q","K0R","K0S","K0T","L0A","L0F","L0H","L0I","L0M","L0P","L0Q","L0R","L0S","L0V","L0W","M0I","M0K","M0L","M0R","M0T","M0V","N0D","N0E","N0H","N0I","N0K","N0S","N0T","N0Y","P0A","P0F","P0H","P0L","P0Q","P0R","P0S","P0T","Q0E","Q0H","Q0K","Q0L","Q0P","Q0R","Q0W","R0C","R0D","R0G","R0H","R0I","R0K","R0L","R0M","R0P","R0Q","R0S","R0T","R0W","S0A","S0C","S0F","S0G","S0I","S0L","S0N","S0P","S0R","S0T","S0W","S0Y","T0A","T0I","T0K","T0L","T0M","T0N","T0P","T0R","T0S","T0V","V0A","V0D","V0E","V0F","V0G","V0I","V0K","V0L","V0M","V0P","W0C","W0G","W0L","W0Q","W0R","W0S","X0*","Y0C","Y0D","Y0F","Y0H","Y0L","Y0N","Y0Q","Y0R","Y0S","del-12","del-15","del-18","del-21","del-24","del-27","del-3","del-30","del-33","del-36","del-39","del-42","del-45","del-48","del-51","del-54","del-57","del-6","del-60","del-63","del-66","del-69","del-72","del-75","del-78","del-9","ins-12","ins-15","ins-18","ins-21","ins-24","ins-27","ins-3","ins-30","ins-33","ins-36","ins-39","ins-6","ins-9","splice-a1-A","splice-a1-C","splice-a1-T","splice-a2-A","splice-a2-C","splice-a2-G","splice-b1-C","splice-b1-G","splice-b1-T","splice-b2-A","splice-b2-C","splice-b2-T" };
float scores[NUM_KEYS] = { 0.91359891142,1.84077794036,1.01098024651,0.350309375974,0.268054671093,3.32067726877,0.900077768437,3.15582095046,0.514060482919,0.540349794826,0.628795059769,2.09327639657,1.22000954491,5.7047532566,29.2743917115,0.595969916709,1.17699832069,0.986631383625,1.65452817547,0.684044202943,0.284371787196,0.93951194795,2.20525153421,1.4316783812,5.58167027113,3.02524793911,0.708654740721,0.330933515822,0.45467038898,2.40831376054,3.39154538121,16.8976235702,0.320902013172,1.53326882958,3.05351440285,2.03182992701,0.702047948101,0.65576453956,0.97621698662,1.1006862319,0.384406382773,0.400694060346,5.1693641127,2.57718986732,1.58030315672,43.6245837269,3.5884738227,2.45518665108,1.13587458176,3.85256064082,3.07938238072,2.35019764444,1.30309822248,0.633139945951,2.13287359624,0.39439020963,0.569328327198,0.378744562026,0.314781476958,0.716827634337,1.96573425468,2.86052627581,0.426846207364,0.50089173886,2.00103437015,1.89833939426,1.14017104718,0.660969407995,0.124905573926,1.35661815248,5.95459646492,0.501852900841,1.78523873226,0.937057916238,0.865792905897,0.448947142995,0.259542241486,12.7134501147,0.631500094397,19.3465545224,0.544537306052,0.699011507653,0.262519618888,0.309163611304,1.20350937887,1.1077402915,2.35289503476,0.30675297571,0.374405222367,3.04311305937,0.579552164832,3.12521265361,0.252632903491,6.19289193732,0.478336569493,0.298365415566,0.252141042725,9.14323467153,0.627308393818,1.16637157016,1.27824346828,0.525281931208,0.447002583942,1.15928116547,0.329023036095,4.36245837269,0.619017974516,1.104099356,0.952536782294,1.36239714355,0.57534565594,0.756575508116,0.408673148047,0.323726616874,0.488357458773,0.666849903837,1.33327792137,0.19037761025,12.3602987226,1.97731301247,38.3595477599,0.547403664788,1.20604622311,1.12591127563,0.18436317034,2.48919977427,0.27724034518,1.69508363222,1.01542566911,1.27602467553,0.670858341181,2.94010613985,0.0764948863701,0.683120797587,1.95994506599,0.141911296461,0.781564556232,1.62328386658,0.176043695858,0.379283579151,0.896810696342,0.193165598642,1.43143165354,1.24343124874,0.172074786805,0.641748865424,0.461683704103,0.841154544451,1.04483261198,0.43243027601,0.438703560296,1.21030361191,0.162096909993,13.905336063,0.182237547843,2.77551618023,2.86412689247,1.91502937479,0.609672706708,0.133753806045,24.7205974453,0.403390502245,0.852928702808,3.07583470517,2.61747502362,2.21746222931,0.764554560163,15.0327957437,0.618329826321,4.1469781362,27.6429358646,2.0572278011,1.60639261377,0.418993177038,0.653887955936,21.1890835245,4.47411178625,49.4411948905,42.9357745102,0.712918388241,11.9298578,4.252337046,9.9527436,10.7235,11.6106,12.4977,3.941480407,13.3848,14.2719,15.159,16.0461,16.9332,17.8203,18.7074,19.5945,20.4816,21.3687,4.033682552,22.2558,23.1429,24.03,24.9171,25.8042,26.6913,27.5784,11.59869346,6.7228767,5.192735791,17.91493848,15.7824,17.7768,19.7712,2.016674044,21.7656,23.76,25.7544,27.7488,5.507683676,15.46012184,19.20532627,7.890694571,27.59342384,13.41412178,8.094265101,7.136069027,10.05920958,13.70509544,7.857813164,7.107669134,15.21498419,11.54494088 };
float conserved[NUM_KEYS] = { 2.77861236573,3.69595112473,2.78158289687,0.650172837121,0.268054671093,3.32067726877,2.94366286187,3.15582095046,0.801800546955,0.996750910768,1.17936028122,5.28861213069,3.51612494445,5.7047532566,29.2743917115,1.50011111282,2.6192361698,1.74958858425,4.43830835802,1.57645466633,0.547517775352,1.70012194995,4.05676423293,2.28496774448,10.4578666008,6.36556517102,1.15988760881,0.539812097288,0.781150046362,4.52562300025,3.39154538121,16.8976235702,0.83872418366,4.57995969396,4.24410362144,2.03182992701,1.87862836538,1.44337630759,2.27561120922,2.89599141958,0.598952603607,0.785166678153,11.5248261708,5.07971425289,3.47003887511,43.6245837269,3.5884738227,6.27864605808,2.33286352514,3.85256064082,5.88858772202,4.08457573855,1.30309822248,1.99366375903,4.27453456609,0.793257819723,1.04526264091,0.684796430406,0.765466968588,1.73951873277,2.10305541659,2.86052627581,0.558609035575,0.842946508963,4.18451830221,3.30803100929,5.76840579829,1.34484620972,0.216933893948,1.35661815248,5.95459646492,0.959635405612,11.3263137705,1.79808858652,1.31027456561,0.798991869633,0.372756919483,12.7134501147,1.21079572772,19.3465545224,0.911324995336,1.36707373523,0.420177441363,0.404388114374,3.36859789134,2.23004033896,9.02777667655,0.727321349074,0.822596417388,10.6728140247,1.03529186134,6.20224892768,0.48968359006,11.4929802538,1.39124528104,0.636172501084,0.57484859255,9.14323467153,0.698911371496,2.59927831437,2.55393837991,0.801242646765,0.80015118965,5.28051417656,0.671230287277,4.36245837269,1.40169277902,2.37751396019,2.18064236834,3.3635339451,1.07747697859,1.30535739741,0.677419532467,0.545735892564,0.569376723886,2.09857486554,4.87592776893,0.332295194373,12.3602987226,3.97373539982,38.3595477599,1.76860483365,2.55051363,5.42844359549,0.305771298896,5.46796797706,0.27724034518,4.3981302265,1.73842692187,3.19768032726,1.40173472301,7.35966034138,0.147951304983,1.11108682406,4.66422394496,0.324122205778,1.17254681829,3.10503111267,0.270778652712,0.652075058202,2.0670264358,0.46743997153,2.79501156481,1.95778903806,0.373368631869,1.1592740375,1.02464492758,0.841154544451,2.06724076149,0.631308798028,1.0054055368,2.75016788198,0.282033600176,13.905336063,0.423548044207,5.8255335952,10.7434504572,5.23293943858,0.780226626075,0.185328912249,24.7205974453,0.6887903473,1.62309356602,3.00364520084,9.1069758573,4.00530975742,0.764554560163,15.0327957437,1.65123745094,17.1436329842,67.3013534151,6.16661163522,3.60134962738,0.658934662783,1.17161469105,21.1890835245,25.9883106502,49.4411948905,42.9357745102,1.55378185295,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0 };
float unconserved[NUM_KEYS] = { 0.390219892002,0.812354782563,0.449286828113,0.183510042772,0.268054671093,3.32067726877,0.290536148385,3.15582095046,0.340439484732,0.275372722663,0.323569561891,0.708975287349,0.422714655564,5.7047532566,29.2743917115,0.22538690233,0.517322307406,0.394804713904,0.532621986266,0.31449593193,0.119266157772,0.388794499587,0.910505376573,0.865863160753,1.87832674331,1.27876960364,0.410204960454,0.166926769783,0.18642367978,1.21735790145,3.39154538121,16.8976235702,0.0911765864685,0.678079059572,1.578662795,2.03182992701,0.210313270251,0.184968941502,0.331568447668,0.353162445208,0.143651750525,0.136036478297,2.69860533082,1.02353162002,0.619292405291,43.6245837269,3.5884738227,0.979433338108,0.473256910936,3.85256064082,1.00364229821,0.948397953033,1.30309822248,0.237783373244,0.92814544074,0.187308458967,0.249322121843,0.136406961359,0.122784637955,0.266861011778,1.83082880332,2.86052627581,0.288037334269,0.165037084637,0.406503002964,1.02117973335,0.295791761021,0.253096076313,0.0582045529163,1.35661815248,5.95459646492,0.249539811041,0.832524786768,0.268632678977,0.418493902095,0.259713536399,0.125625685969,12.7134501147,0.276515832231,19.3465545224,0.210324274926,0.392462633442,0.187260114159,0.20657083957,0.408825534425,0.307800775835,0.899060319949,0.133787289883,0.164875388753,0.455947915141,0.245787934668,0.700601807416,0.0966252358557,1.55736324119,0.133534201284,0.088448145875,0.110750572984,9.14323467153,0.561742068702,0.270374767125,0.559169839753,0.247855443989,0.0969398877746,0.257397197433,0.125016641884,4.36245837269,0.27163063577,0.541802447888,0.425975847444,0.617146779204,0.262365993093,0.368982891804,0.205297857359,0.146289369473,0.368624684222,0.125170966658,0.310910943188,0.0913996058811,12.3602987226,0.736741174529,38.3595477599,0.156134012168,0.470085908769,0.328116905075,0.0666672201166,0.707678204804,0.27724034518,0.705475906971,0.425702085516,0.40261165342,0.233143554121,1.04701020896,0.0290437561938,0.335318683611,0.469242492337,0.0590635233874,0.483411059634,0.691042486655,0.0928225665578,0.176188413213,0.353220366567,0.0494178294865,0.585235802956,0.496258152798,0.0820030090174,0.314612477149,0.104240921658,0.841154544451,0.535053673856,0.28516973357,0.18638319452,0.468461345863,0.0663344666187,13.905336063,0.0806901979333,0.894868489532,0.737606912985,0.585102079237,0.413716387905,0.0870871717132,24.7205974453,0.200274410234,0.422973772401,3.2831698866,0.432116314185,0.623759854226,0.764554560163,15.0327957437,0.196710984652,0.494583125784,10.1449016233,0.70516791894,0.432951936813,0.195740296452,0.262507425604,21.1890835245,1.56917173613,49.4411948905,42.9357745102,0.188188039933,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0 };
aam = NULL;
int i = 0;
for (i = 0; i < NUM_KEYS; i++) {
struct aa_matrix * new_aa = (struct aa_matrix *)malloc(sizeof(struct aa_matrix));
memset(new_aa->aa_change,'\0',20);
strcpy(new_aa->aa_change, keys[i]);
new_aa->score = scores[i];
new_aa->cons = conserved[i];
new_aa->uncons = unconserved[i];
HASH_ADD_STR(aam, aa_change, new_aa);
}
}
void get_aaw(struct transcript_anno_info ** ttai, sds ref, sds var, float phast){
(*ttai)->aaw = 1.0; //make sure aa is set to 1.0 before finding appropriate weight
//check for splice terms (sequence ontology terms); most severe substitution score is assigned
struct aa_matrix * taa = NULL;
int i;
for (i=0; i < (*ttai)->anno_tags.n; i++) {
if (strcmp(kv_A((*ttai)->anno_tags, i), "splice_donor_variant") == 0) {
HASH_FIND_STR(aam, "splice-a1-T", taa);
if (taa != NULL) {
(*ttai)->aaw = 1.0 / taa->score;
(*ttai)->coding = 1;
}
return;
}
}
for (i=0; i < (*ttai)->anno_tags.n; i++) {
if (strcmp(kv_A((*ttai)->anno_tags, i), "splice_acceptor_variant") == 0) {
HASH_FIND_STR(aam, "splice-b2-T", taa);
if (taa != NULL) {
(*ttai)->aaw = 1.0 / taa->score;
(*ttai)->coding = 1;
}
return;
}
}
if ((*ttai)->pref == NULL) {
return;
}
//check for non-synonymous change
size_t tmpl_pref = sdslen((*ttai)->pref);
size_t tmpl_pvar = sdslen((*ttai)->pvar);
if (strncmp((*ttai)->pref, (*ttai)->pvar, tmpl_pref < tmpl_pvar ? tmpl_pref : tmpl_pvar) != 0 || tmpl_pref != tmpl_pvar) {
(*ttai)->coding = 1;
int diff = abs((int)sdslen(ref) - (int)sdslen(var));
if (diff > 0) { //get aa score for indel
char pchange[20];
memset(pchange, '\0', 20*sizeof(char));
if (sdslen(ref) < sdslen(var)) { //insertion
if (diff%3 == 0 && diff <= 39) {
sprintf(pchange, "ins-%d", diff);
}
else {
sprintf(pchange, "ins-39");
}
HASH_FIND_STR(aam, pchange, taa);
if (taa != NULL) {
(*ttai)->aaw = 1.0 / taa->score;
}
}
else { //deletion
if (diff%3 == 0 && diff <= 78) {
sprintf(pchange, "del-%d", diff);
}
else {
sprintf(pchange, "del-78");
}
HASH_FIND_STR(aam, pchange, taa);
if (taa != NULL) {
(*ttai)->aaw = 1.0 / taa->score;
}
}
}
else { //get aa score for snv
if (sdslen((*ttai)->pref) > 1 || sdslen((*ttai)->pvar) > 1) { //skip if amino acid is longer than 1, can happen on multiallelic site
fprintf(stderr, "WARNING: AAW set to 1.0 because annotated protein variant > 1\n");
return;
}
char pchange[5];
memset(pchange, '\0', 5*sizeof(char));
if (strcmp((*ttai)->pref, "*") == 0) {
sprintf(pchange, "*0X");
}
else if (strcmp((*ttai)->pvar, "*") == 0){
sprintf(pchange, "X0*");
}
else {
sprintf(pchange, "%s0%s", (*ttai)->pref, (*ttai)->pvar);
}
HASH_FIND_STR(aam, pchange, taa);
if (taa != NULL) {
if (phast >= 0) {
(*ttai)->aaw = 1.0 / ( ((1.0 - phast) * taa->uncons) + (phast * taa->cons) );
}
else {
(*ttai)->aaw = 1.0 / taa->score;
}
}
}
return;
}
//use phastcons score if synonymous or noncoding
if (phast >= 0) {
(*ttai)->aaw = 1.0 / ( ((1.0 - phast) * UNCONS) + (phast * CONS) );
return;
}
}