Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions htslib/sam.h
Original file line number Diff line number Diff line change
Expand Up @@ -1143,6 +1143,60 @@ ssize_t sam_parse_cigar(const char *in, char **end, uint32_t **a_cigar, size_t *
HTSLIB_EXPORT
ssize_t bam_parse_cigar(const char *in, char **end, bam1_t *b);

/// Unpack nibble-encoded sequence into a char buffer
/** @param dest Destination character buffer
@param seq Pointer to sequence bases packed as per bam1_t
@param seqlen Number of bases at @p seq to be unpacked

Unpacks nucleotides encoded in the (seqlen+1)/2 bytes at @p seq.
Writes exactly @p seqlen characters to @p dest. If NUL-termination
is desired, the calling code should add '\0' appropriately itself.
*/
HTSLIB_EXPORT
void sam_format_seq(char *dest, const uint8_t *seq, size_t seqlen);

/// Unpack BAM record's sequence field into a char buffer
/** @param dest Destination character buffer
@param b Source alignment record
*/
static inline void bam_format_seq(char *dest, const bam1_t *b) {
sam_format_seq(dest, bam_get_seq(b), b->core.l_qseq);
}

/// Decode base qualities into a char buffer
/** @param dest Destination character buffer
@param qual Pointer to base qualities encoded as per bam1_t
@param quallen Number of base qualities at @p seq to be decoded

Decodes base qualities encoded in the @p quallen bytes at @p seq.
Writes exactly @p quallen characters to @p dest. If the base qualities
are missing (encoded as '\xff'), writes '*' followed by quallen-1 NULs.
Otherwise if NUL-termination is desired, the calling code should add
'\0' appropriately itself.
*/
HTSLIB_EXPORT
void sam_format_qual(char *dest, const uint8_t *qual, size_t quallen);

/// Decode BAM record's base qualities field into a char buffer
/** @param dest Destination character buffer
@param b Source alignment record
*/
static inline void bam_format_qual(char *dest, const bam1_t *b) {
sam_format_qual(dest, bam_get_qual(b), b->core.l_qseq);
}

/// Pack ASCII sequence string into nibble-encoded sequence buffer
/** FIXME
*/
HTSLIB_EXPORT
int sam_parse_seq(char *seq, const uint8_t *src, size_t len);

/// Encode ASCII base qualities into uint8_t buffer
/** FIXME
*/
HTSLIB_EXPORT
int sam_parse_qual(uint8_t *qual, const char *src, size_t len);

/*************************
*** BAM/CRAM indexing ***
*************************/
Expand Down
45 changes: 45 additions & 0 deletions sam.c
Original file line number Diff line number Diff line change
Expand Up @@ -2683,6 +2683,7 @@ int sam_parse1(kstring_t *s, sam_hdr_t *h, bam1_t *b)
// Basic version which operates a byte at a time
#define COPY_MINUS_N(to,from,n,l,failed) do { \
uint8_t uflow = 0; \
size_t i; \
for (i = 0; i < (l); ++i) { \
(to)[i] = (from)[i] - (n); \
uflow |= (uint8_t) (to)[i]; \
Expand Down Expand Up @@ -4401,6 +4402,50 @@ int sam_format1(const bam_hdr_t *h, const bam1_t *b, kstring_t *str)
return sam_format1_append(h, b, str);
}

void sam_format_seq(char *dest, const uint8_t *seq, size_t seqlen)
{
nibble2base(seq, dest, seqlen);
}

void sam_format_qual(char *dest, const uint8_t *qual, size_t quallen)
{
if (qual[0] != 0xff) {
add33((uint8_t *) dest, qual, quallen);
}
else {
memset(dest, '\0', quallen);
dest[0] = '*';
}
}

int sam_parse_seq(char *seq, const uint8_t *src, size_t len)
{
if (src[0] == '*' && (len == 1 || src[1] == '\0')) return 0;

char *seq0 = seq;
size_t i, even_len = len & ~1;
for (i = 0; i < even_len; i += 2)
*seq++ = (seq_nt16_table[src[i]] << 4) | seq_nt16_table[src[i + 1]];
for (; i < len; i++)
*seq++ = seq_nt16_table[src[i]] << 4;

return seq - seq0;
}

int sam_parse_qual(uint8_t *qual, const char *src, size_t len)
{
if (src[0] == '*' && (len == 1 || src[1] == '\0')) {
memset(qual, 0xff, len);
}
else {
int failed = 0;
COPY_MINUS_N(qual, src, 33, len, failed);
if (failed) { errno = EINVAL; return -1; }
}

return len;
}

static inline uint8_t *skip_aux(uint8_t *s, uint8_t *end);
int fastq_format1(fastq_state *x, const bam1_t *b, kstring_t *str)
{
Expand Down
6 changes: 3 additions & 3 deletions sam_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ static inline int possibly_expand_bam_data(bam1_t *b, size_t bytes) {
* for (i = 0; i < len; i++)
* seq[i] = seq_nt16_str[bam_seqi(nib, i)];
*/
static inline void nibble2base_default(uint8_t *nib, char *seq, int len) {
static inline void nibble2base_default(const uint8_t *nib, char *seq, int len) {
static const char code2base[512] =
"===A=C=M=G=R=S=V=T=W=Y=H=K=D=B=N"
"A=AAACAMAGARASAVATAWAYAHAKADABAN"
Expand Down Expand Up @@ -108,9 +108,9 @@ static inline void nibble2base_default(uint8_t *nib, char *seq, int len) {
#define BUILDING_SIMD_NIBBLE2BASE
#endif

static inline void nibble2base(uint8_t *nib, char *seq, int len) {
static inline void nibble2base(const uint8_t *nib, char *seq, int len) {
#ifdef BUILDING_SIMD_NIBBLE2BASE
extern void (*htslib_nibble2base)(uint8_t *nib, char *seq, int len);
extern void (*htslib_nibble2base)(const uint8_t *nib, char *seq, int len);
htslib_nibble2base(nib, seq, len);
#else
nibble2base_default(nib, seq, len);
Expand Down
8 changes: 4 additions & 4 deletions simd.c
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ static inline int cpu_supports_neon(void) {

#ifdef BUILDING_SIMD_NIBBLE2BASE

void (*htslib_nibble2base)(uint8_t *nib, char *seq, int len) = nibble2base_default;
void (*htslib_nibble2base)(const uint8_t *nib, char *seq, int len) = nibble2base_default;

#if defined __x86_64__

Expand All @@ -130,10 +130,10 @@ void (*htslib_nibble2base)(uint8_t *nib, char *seq, int len) = nibble2base_defau
*/

__attribute__((target("ssse3")))
static void nibble2base_ssse3(uint8_t *nib, char *seq, int len) {
static void nibble2base_ssse3(const uint8_t *nib, char *seq, int len) {
const char *seq_end_ptr = seq + len;
char *seq_cursor = seq;
uint8_t *nibble_cursor = nib;
const uint8_t *nibble_cursor = nib;
const char *seq_vec_end_ptr = seq_end_ptr - (2 * sizeof(__m128i) - 1);
__m128i nuc_lookup_vec = _mm_lddqu_si128((__m128i *)seq_nt16_str);
/* Nucleotides are encoded 4-bits per nucleotide and stored in 8-bit bytes
Expand Down Expand Up @@ -170,7 +170,7 @@ static void nibble2base_resolve(void) {

#elif defined __ARM_NEON

static void nibble2base_neon(uint8_t *nib, char *seq0, int len) {
static void nibble2base_neon(const uint8_t *nib, char *seq0, int len) {
uint8x16_t low_nibbles_mask = vdupq_n_u8(0x0f);
uint8x16_t nuc_lookup_vec = vld1q_u8((const uint8_t *) seq_nt16_str);
#ifndef __aarch64__
Expand Down
2 changes: 1 addition & 1 deletion test/test_nibbles.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ char *fmttime(long long elapsed) {
return buf;
}

void nibble2base_single(uint8_t *nib, char *seq, int len) {
void nibble2base_single(const uint8_t *nib, char *seq, int len) {
int i;
for (i = 0; i < len; i++)
seq[i] = seq_nt16_str[bam_seqi(nib, i)];
Expand Down