forked from makovalab-psu/NoiseCancellingRepeatFinder
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathseq_ops.c
97 lines (86 loc) · 3.1 KB
/
seq_ops.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
// seq_ops.c-- operations on nucleotide sequences
#include <stdlib.h>
#define true 1
#define false 0
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <limits.h>
#include <inttypes.h>
#include <math.h>
#include <float.h>
#define seq_ops_owner // (make this the owner of its globals)
#include "seq_ops.h" // interface to this module
//----------
//
// reverse_complement--
// Reverse-complement a nucleotide sequence, in place.
//
// Complementation here handles ACGT and all iupac ambiguity codes. Upper
// and lower case are preserved. All non-DNA characters are left unchanged
// (but are positionally reversed).
//
//----------
//
// Arguments:
// u8* nt: The zero-terminated sequence to reverse-complement.
//
// Returns:
// (nothing)
//
//----------
const u8 nukeToComplement[256] = // assumes upper/lower iupac code
{
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, // 0x
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, // 1x
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, // 2x
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, // 3x (numbers)
0x40,'T', 'V', 'G', 'H', 0x45,0x46,'C', 'D', 0x49,0x4A,'M', 0x4C,'K', 'N' ,0x4F, // 4x (upper case)
0x50,0x51,'Y', 'S', 'A', 0x55,'B', 'W', 0x58,'R', 0x5A,0x5B,0x5C,0x5D,0x5E,0x5F, // 5x (upper case)
0x60,'t', 'v', 'g', 'h', 0x65,0x66,'c', 'd', 0x69,0x6a,'m', 0x6c,'k', 'n' ,0x6f, // 6x (lower case)
0x70,0x71,'y', 's', 'a', 0x75,'b', 'w', 0x78,'r', 0x7a,0x7b,0x7c,0x7d,0x7e,0x7f, // 7x (lower case)
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, // 8x
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, // 9x
0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, // Ax
0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, // Bx
0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, // Cx
0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, // Dx
0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, // Ex
0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF // Fx
};
void reverse_complement
(u8* nt)
{
u8* left, *right;
u8 nuke;
for (right=nt ; *right!=0 ; right++) ; // find end of sequence
right--;
left = nt;
for ( ; left<=right ; left++,right--) // nota bene: for odd length, in
{ // .. the last pass through the
nuke = nukeToComplement[*left ]; // .. loop,
*left = nukeToComplement[*right]; // .. *left = ...[*right]
*right = nuke; // .. is overwritten, correctly, by
} // .. *right = nuke
}
//----------
//
// unmask_sequence--
// Remove soft-masking in a sequence, by converting any lowercase nucleotides
// to uppercase.
//
//----------
//
// Arguments:
// u8* nt: The zero-terminated sequence to unmask.
//
// Returns:
// (nothing)
//
//----------
void unmask_sequence
(u8* nt)
{
for ( ; *nt!=0 ; nt++)
*nt = toUpperACGTN(*nt);
}