From d6667e78bbde00232ff25d3b6f16964cc7639378 Mon Sep 17 00:00:00 2001 From: ruanjue Date: Fri, 13 Sep 2019 15:40:02 +0800 Subject: [PATCH] fixed a bug of integer overflow when assembling more than 4G BINs --- kbm.c | 22 +++---- kbm.h | 175 ++++++++++++++++++++------------------------------ kbmpoa.h | 8 +-- wtdbg-graph.h | 28 +++----- wtdbg.c | 4 +- wtdbg.h | 29 ++++----- 6 files changed, 109 insertions(+), 157 deletions(-) diff --git a/kbm.c b/kbm.c index 63af8fd..4388a0b 100644 --- a/kbm.c +++ b/kbm.c @@ -53,7 +53,6 @@ int kbm_usage(){ fprintf(stdout, " if K >= 1, take the integer value as cutoff, MUST <= 65535\n"); fprintf(stdout, " else, mask the top fraction part high frequency kmers\n"); fprintf(stdout, " -E Min kmer frequency, [1]\n"); - fprintf(stdout, " -F Filter low frequency kmers by a 4G-bytes array (max_occ=3 2-bits). Here, -E must greater than 1\n"); fprintf(stdout, " -O Filter low complexity bins (#indexed_kmer less than <-O>), [2]\n"); fprintf(stdout, " -S Subsampling kmers, 1/(<-S>) kmers are indexed, [4.00]\n"); fprintf(stdout, " -S is very useful in saving memeory and speeding up\n"); @@ -161,7 +160,7 @@ if(maln->corr_mode){ for(i=0;isize;i++){ tidx = get_u4v(tidxs, i); rd = ref_kbmreadv(maln->aux->kbm->reads, tidx); - query_index_kbm(raux, rd->tag, tidx, maln->aux->kbm->rdseqs, rd->rdoff, rd->rdlen); + query_index_kbm(raux, rd->tag, tidx, maln->aux->kbm->rdseqs, rd->seqoff * KBM_BIN_SIZE, rd->bincnt * KBM_BIN_SIZE); map_kbm(raux); for(j=0;jhits->size;j++){ flip_hit_kbmaux(maln->aux, raux, j); @@ -249,7 +248,7 @@ int kbm_main(int argc, char **argv){ server = 0; tree_maxcnt = 10; opt_flags = 0; - while((c = getopt(argc, argv, "hi:d:o:fIt:k:p:K:E:FO:S:B:G:D:X:Y:Z:x:y:z:l:m:n:s:cr:CT:W:R:qvV")) != -1){ + while((c = getopt(argc, argv, "hi:d:o:fIt:k:p:K:E:O:S:B:G:D:X:Y:Z:x:y:z:l:m:n:s:cr:CT:W:R:qvV")) != -1){ switch(c){ case 'h': return kbm_usage(); case 'i': push_cplist(qrys, optarg); break; @@ -263,7 +262,6 @@ int kbm_main(int argc, char **argv){ case 'p': par->psize = atoi(optarg); opt_flags |= (1 << 0); break; case 'K': fval = atof(optarg); par->kmax = fval; par->ktop = fval - par->kmax; break; case 'E': par->kmin = atoi(optarg); break; - case 'F': par->use_kf = 1; break; case 'O': par->min_bin_degree = atoi(optarg); break; case 'S': par->kmer_mod = UInt(atof(optarg) * KBM_N_HASH); opt_flags |= (1 << 2); break; case 'B': par->ksampling = atoi(optarg); break; @@ -569,7 +567,7 @@ int kbm_main(int argc, char **argv){ nhit += aux->hits->size; } } - trunc_string(seq->seq, cvt_kbm_read_length(seq->seq->size, KBM_BIN_SIZE)); + trunc_string(seq->seq, kbm_cvt_length(seq->seq->size)); clear_basebank(maln->rdseqs); seq2basebank(maln->rdseqs, seq->seq->string, seq->seq->size); clear_string(maln->rdtag); @@ -593,8 +591,8 @@ int kbm_main(int argc, char **argv){ if(qidx == MAX_U4 || tidx == MAX_U4) continue; if(qidx > tidx){ swap_var(qidx, tidx); } maln->qidx = qidx; - maln->rdoff = kbm->reads->buffer[qidx].rdoff; - maln->rdlen = kbm->reads->buffer[qidx].rdlen; + maln->rdoff = kbm->reads->buffer[qidx].seqoff * KBM_BIN_SIZE; + maln->rdlen = kbm->reads->buffer[qidx].bincnt * KBM_BIN_SIZE; aux->bmin = kbm->reads->buffer[tidx].binoff; aux->bmax = kbm->reads->buffer[tidx].binoff + kbm->reads->buffer[tidx].bincnt; fprintf(out, "%s <-> %s\n", kbm->reads->buffer[qidx].tag, kbm->reads->buffer[tidx].tag); @@ -619,8 +617,8 @@ int kbm_main(int argc, char **argv){ if(nc < 1) continue; qidx = getval_cuhash(kbm->tag2idx, get_col_str(fr, 0)); if(qidx == MAX_U4) continue; - print_exists_index_kbm(kbm, kbm->reads->buffer[qidx].tag, kbm->rdseqs, kbm->reads->buffer[qidx].rdoff, - kbm->reads->buffer[qidx].rdlen, kmers, stdout); + print_exists_index_kbm(kbm, kbm->reads->buffer[qidx].tag, kbm->rdseqs, kbm->reads->buffer[qidx].seqoff * KBM_BIN_SIZE, + kbm->reads->buffer[qidx].bincnt * KBM_BIN_SIZE, kmers, stdout); } free_kmeroffv(kmers[0]); free_kmeroffv(kmers[1]); @@ -635,7 +633,7 @@ int kbm_main(int argc, char **argv){ continue; } fprintf(out, ">%s\n", kbm->reads->buffer[qidx].tag); - println_fwdseq_basebank(kbm->rdseqs, kbm->reads->buffer[qidx].rdoff, kbm->reads->buffer[qidx].rdlen, out); + println_fwdseq_basebank(kbm->rdseqs, kbm->reads->buffer[qidx].seqoff * KBM_BIN_SIZE, kbm->reads->buffer[qidx].bincnt * KBM_BIN_SIZE, out); fflush(out); } } @@ -687,8 +685,8 @@ int kbm_main(int argc, char **argv){ fprintf(KBM_LOGF, "\r%u\t%llu", qidx, nhit); fflush(KBM_LOGF); } maln->qidx = qidx; - maln->rdoff = kbm->reads->buffer[qidx].rdoff; - maln->rdlen = kbm->reads->buffer[qidx].rdlen; + maln->rdoff = kbm->reads->buffer[qidx].seqoff * KBM_BIN_SIZE; + maln->rdlen = kbm->reads->buffer[qidx].bincnt * KBM_BIN_SIZE; thread_wake(maln); } fprintf(KBM_LOGF, "\r%u\t%llu\n", qidx, nhit); fflush(KBM_LOGF); diff --git a/kbm.h b/kbm.h index ffe335d..0730eef 100644 --- a/kbm.h +++ b/kbm.h @@ -33,22 +33,17 @@ //#define __DEBUG__ 1 #define TEST_MODE -#define KBM_BIN_SIZE 256 +#define KBM_BIN_BITS 8 +#define KBM_BSIZE 256 +#define KBM_BIN_SIZE KBM_BSIZE #define KBM_MAX_BINCNT 0xFFFFFFFFFFLLU // 40 bits, 1024 G #define KBM_MAX_RDCNT 0x3FFFFFFF // 30 bits, 1 G -#define KBM_MAX_RDBINCNT 0xFFFFFF // 24 bits +#define KBM_MAX_RDBINCNT 0x7FFFFF // 23 bits // able to index reference sequences -#define KBM_MAX_RDLEN 0xFFFFFFFFU // 32 bits, 4 G bp - +#define KBM_MAX_RDLEN 0x7FFFFFFFU // 31 bits, 2 G bp #define KBM_MAX_KSIZE 23 #define KBM_MAX_KCNT 0xFFFF // 16 bits, 65535 - #define KBM_N_HASH 4096 - -#define KBM_KF_BITS 32 -#define KBM_KF_SIZE (1LLU << KBM_KF_BITS) -#define KBM_KF_MASK (KBM_KF_SIZE - 1LLU) - #define KBM_LOGF stderr #define KBM_LOGFNO STDERR_FILENO static int KBM_LOG = 0; @@ -56,29 +51,24 @@ static int KBM_LOG = 0; #define KBM_LOG_MID 2 #define KBM_LOG_HIG 3 #define KBM_LOG_ALL 4 - #define KBM_MAX_RDGRP1 0x7FFFFF #define KBM_MAX_RDGRP2 0xFF typedef struct { - u8i rdoff:40, bincnt:24; - u4i rdlen, binoff; + u8i seqoff:40, flag:24; // seqoff is counted in BIN(256bp) + u8i binoff:40, bincnt:23, closed:1; char *tag; } kbm_read_t; define_list(kbmreadv, kbm_read_t); const obj_desc_t kbm_read_t_obj_desc = {"kbm_read_t_obj_desc", sizeof(kbm_read_t), 1, {1}, {offsetof(kbm_read_t, tag)}, {&OBJ_DESC_CHAR_ARRAY}, NULL, NULL}; static inline size_t kbmreadv_deep_obj_desc_cnt(void *list, int idx){ if(idx == 0) return ((kbmreadv*)list)->size; else return 0; } static const obj_desc_t kbmreadv_deep_obj_desc = {.tag = "kbmreadv_deep_obj_desc", .size = sizeof(kbmreadv), .n_child = 1, .mem_type = {1}, .addr = {offsetof(kbmreadv, buffer)}, .desc = {&kbm_read_t_obj_desc}, .cnt = kbmreadv_deep_obj_desc_cnt, .post = NULL}; +#define kbm_read_seqoff(rd) ((rd)->seqoff << KBM_BIN_BITS) +#define kbm_read_seqlen(rd) ((rd)->bincnt << KBM_BIN_BITS) -#if 0 -#define KBM_MAX_BIN_DEGREE 0x7FFU -#endif #define KBM_MAX_BIN_DEGREE 0x1FFU // each BIN takes KBM_BIN_SIZE bp in uncompressed reads typedef struct { -#if 0 - u4i ridx:28, off:24, closed:1, degree:11; // off * KBM_BIN_SIZE is the real position -#endif u4i ridx:30, off:24, closed:1, degree:9; // off * KBM_BIN_SIZE is the real position } kbm_bin_t; define_list(kbmbinv, kbm_bin_t); @@ -166,12 +156,9 @@ define_list(kbmmapv, kbm_map_t); typedef struct { int rd_len_order; // 0 - //int hk; // 0 - int use_kf; // 0 int min_bin_degree; // 2 u4i ksize, psize; // 0, 21 - u4i kbsize; // 256, MUST <= 256 - u4i kmax, kmin, kmer_mod, ksampling; // 1000, 1, 4.0 * KBM_N_HASH, KBM_BIN_SIZE + u4i kmax, kmin, kmer_mod, ksampling; // 1000, 1, 4.0 * KBM_N_HASH, KBM_BSIZE float ktop; // 0.05 // runtime u4i strand_mask; // 3. 1: forward; 2: reverse; 3: both @@ -195,11 +182,12 @@ typedef struct { u8i flags; // 64 bits, 0: mem_load all, 1: mem_load rdseqs+reads; 2: Single Hash Mode;3-63: unused KBMPar *par; BaseBank *rdseqs; + u8i avail_bases; + u4i avail_reads; kbmreadv *reads; cuhash *tag2idx; kbmbinv *bins; BitVec *binmarks; - //u8i *kfs; kbmbmerv *seeds; kbmbauxv *sauxs; kbmhash *hashs[KBM_N_HASH]; @@ -282,15 +270,13 @@ static inline KBMPar* init_kbmpar(){ KBMPar *par; par = malloc(sizeof(KBMPar)); par->rd_len_order = 0; - par->use_kf = 0; par->min_bin_degree = 2; par->ksize = 0; par->psize = 21; - par->kbsize = 256; par->kmax = 1000; par->kmin = 1; par->kmer_mod = 4 * KBM_N_HASH; - par->ksampling = par->kbsize; + par->ksampling = KBM_BSIZE; par->ktop = 0.05; par->strand_mask = 3; par->self_aln = 0; @@ -302,7 +288,7 @@ static inline KBMPar* init_kbmpar(){ par->pgap = -7; par->pvar = -21; par->max_hit = 1000; - par->min_aln = 2048 / par->kbsize; + par->min_aln = 2048 / KBM_BIN_SIZE; par->min_mat = 200; par->min_sim = 0.05; par->aln_var = 0.25; @@ -319,6 +305,8 @@ static inline KBM* init_kbm(KBMPar *par){ kbm->flags = 0; kbm->par = par; kbm->rdseqs = init_basebank(); + kbm->avail_bases = 0; + kbm->avail_reads = 0; kbm->reads = init_kbmreadv(64); kbm->tag2idx = init_cuhash(1023); kbm->bins = init_kbmbinv(64); @@ -400,11 +388,7 @@ static inline void clear_kbm(KBM *kbm){ clear_kbmbauxv(kbm->sauxs); } -static inline int cvt_kbm_read_length(u4i seqlen, u4i kbsize){ - if(seqlen > KBM_MAX_RDLEN) seqlen = KBM_MAX_RDLEN; - seqlen = (seqlen / kbsize) * kbsize; - return seqlen; -} +#define kbm_cvt_length(seqlen) ((seqlen) & (MAX_U8 << KBM_BIN_BITS)) static inline void push_kbm(KBM *kbm, char *tag, int taglen, char *seq, u4i seqlen){ kbm_read_t *rd; @@ -416,18 +400,16 @@ static inline void push_kbm(KBM *kbm, char *tag, int taglen, char *seq, u4i seql } else { ptr = NULL; } - seqlen = cvt_kbm_read_length(seqlen, kbm->par->kbsize); + if(seqlen > KBM_MAX_RDLEN) seqlen = KBM_MAX_RDLEN; + seqlen = kbm_cvt_length(seqlen); rd = next_ref_kbmreadv(kbm->reads); - rd->rdoff = kbm->rdseqs->size; - rd->rdlen = seqlen; + rd->seqoff = (kbm->rdseqs->size >> KBM_BIN_BITS); + rd->flag = 0; rd->binoff = 0; - rd->bincnt = 0; + rd->bincnt = seqlen / KBM_BIN_SIZE; + rd->closed = 0; rd->tag = ptr; seq2basebank(kbm->rdseqs, seq, seqlen); - // make sure rdoff is even - if(kbm->rdseqs->size & 0x1){ - bit2basebank(kbm->rdseqs, 0); - } } static inline void bitpush_kbm(KBM *kbm, char *tag, int taglen, u8i *seqs, u8i seqoff, u4i seqlen){ @@ -440,18 +422,16 @@ static inline void bitpush_kbm(KBM *kbm, char *tag, int taglen, u8i *seqs, u8i s } else { ptr = NULL; } - seqlen = cvt_kbm_read_length(seqlen, kbm->par->kbsize); + if(seqlen > KBM_MAX_RDLEN) seqlen = KBM_MAX_RDLEN; + seqlen = kbm_cvt_length(seqlen); rd = next_ref_kbmreadv(kbm->reads); - rd->rdoff = kbm->rdseqs->size; - rd->rdlen = seqlen; + rd->seqoff = (kbm->rdseqs->size >> KBM_BIN_BITS); + rd->flag = 0; rd->binoff = 0; - rd->bincnt = 0; + rd->bincnt = seqlen / KBM_BIN_SIZE; + rd->closed = 0; rd->tag = ptr; fast_fwdbits2basebank(kbm->rdseqs, seqs, seqoff, seqlen); - // make sure rdoff is even - if(kbm->rdseqs->size & 0x1){ - bit2basebank(kbm->rdseqs, 0); - } } // Please call no more than once @@ -461,21 +441,21 @@ static inline u8i filter_reads_kbm(KBM *kbm, u8i retain_size, int strategy){ if(retain_size == 0 || retain_size >= kbm->rdseqs->size) return kbm->rdseqs->size; if((kbm->flags & 0x2) == 0){ if(kbm->par->rd_len_order){ - sort_array(kbm->reads->buffer, kbm->reads->size, kbm_read_t, num_cmpgt(b.rdlen, a.rdlen)); + sort_array(kbm->reads->buffer, kbm->reads->size, kbm_read_t, num_cmpgt(b.bincnt, a.bincnt)); if(strategy == 0){ // longest len = 0; for(e=0;ereads->size;e++){ - len += kbm->reads->buffer[e].rdlen; + len += kbm->reads->buffer[e].bincnt * KBM_BIN_SIZE; if(len >= retain_size) break; } kbm->reads->size = e; } else if(strategy == 1){ // median m = kbm->reads->size / 2; - len = kbm->reads->buffer[m].rdlen; + len = kbm->reads->buffer[m].bincnt * KBM_BIN_SIZE; e = m; for(b=0;b<=m&&lenreads->buffer[m - b].rdlen; - len += kbm->reads->buffer[m + b].rdlen; + len += kbm->reads->buffer[m - b].bincnt * KBM_BIN_SIZE; + len += kbm->reads->buffer[m + b].bincnt * KBM_BIN_SIZE; } e = b * 2; b = m - b; @@ -500,19 +480,19 @@ static inline void ready_kbm(KBM *kbm){ u4i i, j; if((kbm->flags & 0x2) == 0){ if(kbm->par->rd_len_order){ - sort_array(kbm->reads->buffer, kbm->reads->size, kbm_read_t, num_cmpgt(b.rdlen, a.rdlen)); + sort_array(kbm->reads->buffer, kbm->reads->size, kbm_read_t, num_cmpgt(b.bincnt, a.bincnt)); } - encap_basebank(kbm->rdseqs, kbm->par->kbsize); + encap_basebank(kbm->rdseqs, KBM_BSIZE); } clear_kbmbinv(kbm->bins); for(i=0;ireads->size;i++){ rd = ref_kbmreadv(kbm->reads, i); if(rd->tag) put_cuhash(kbm->tag2idx, (cuhash_t){rd->tag, i}); if((kbm->flags & 0x2) == 0) rd->binoff = kbm->bins->size; - for(j=0;j+kbm->par->kbsize<=rd->rdlen;j+=kbm->par->kbsize){ - push_kbmbinv(kbm->bins, (kbm_bin_t){i, j / kbm->par->kbsize, 0, 0}); + for(j=0;jbincnt;j++){ + push_kbmbinv(kbm->bins, (kbm_bin_t){i, j, 0, 0}); } - if((kbm->flags & 0x2) == 0) rd->bincnt = j / kbm->par->kbsize; + if((kbm->flags & 0x2) == 0) rd->bincnt = j; } clear_bitvec(kbm->binmarks); encap_bitvec(kbm->binmarks, kbm->bins->size); @@ -654,7 +634,7 @@ static inline u8i seed2solid_idx_kbm(KBM *kbm, kbm_dpe_t *p){ u8i seqoff; b = kbm->bins->buffer + p->bidx; rd = kbm->reads->buffer + b->ridx; - seqoff = ((rd->rdoff + b->off * kbm->par->kbsize) >> 1) + p->koff; + seqoff = (((rd->seqoff + b->off) * KBM_BSIZE) >> 1) + p->koff; return seqoff; } @@ -662,7 +642,7 @@ static inline u8i rdoff2solid_idx_kbm(KBM *kbm, u4i ridx, u4i roff){ kbm_read_t *rd; u8i seqoff; rd = kbm->reads->buffer + ridx; - seqoff = (rd->rdoff + roff) >> 1; + seqoff = (rd->seqoff * KBM_BIN_SIZE + roff) >> 1; return seqoff; } @@ -683,7 +663,7 @@ define_list(tmpbmerv, kbm_tmp_bmer_t); thread_beg_def(midx); KBM *kbm; -u4i beg, end; // (end - beg) * KBM_BIN_SIZE MUST <= KBM_KMEROFF_MAX +u8i beg, end; // (end - beg) * KBM_BSIZE MUST <= KBM_KMEROFF_MAX u8i ktot, nrem, Nrem, none, nflt, offset; u8i srem, Srem; u8i *cnts, n_cnt; @@ -702,8 +682,8 @@ kbm_kmer_t *u; kbm_kaux_t *x; kmeroffv *kmers[2]; tmpbmerv *bms; -u8i off; -u4i bidx, i, j, k, len, ncpu, tidx, kidx; +u8i bidx, off; +u4i i, j, k, len, ncpu, tidx, kidx; int exists; kbm = midx->kbm; ncpu = midx->n_cpu; @@ -718,12 +698,11 @@ if(midx->task == 1){ // counting kmers for(i=0;ibeg+tidx;bidxend;bidx+=ncpu){ - if(KBM_LOG == 0 && tidx == 0 && ((bidx - midx->beg) % 100000) == 0){ fprintf(KBM_LOGF, "\r%u", bidx - midx->beg); fflush(KBM_LOGF); } + if(KBM_LOG == 0 && tidx == 0 && ((bidx - midx->beg) % 100000) == 0){ fprintf(KBM_LOGF, "\r%llu", bidx - midx->beg); fflush(KBM_LOGF); } bin = ref_kbmbinv(kbm->bins, bidx); if(bin->closed) continue; - if(((u4i)bin->off + 1) * kbm->par->kbsize > kbm->reads->buffer[bin->ridx].rdlen) continue; - off = kbm->reads->buffer[bin->ridx].rdoff + bin->off * kbm->par->kbsize; - len = kbm->par->kbsize; + off = (kbm->reads->buffer[bin->ridx].seqoff + bin->off) * KBM_BIN_SIZE; + len = KBM_BIN_SIZE; split_FIXP_kmers_kbm(kbm->rdseqs, off, len, kbm->par->ksize, kbm->par->psize, kbm->par->kmer_mod, kmers); for(i=0;i<2;i++){ for(j=0;jsize;j++){ @@ -826,15 +805,14 @@ if(midx->task == 1){ // fill seeds for(i=0;ipar->kbsize); + chgs = init_u4v(KBM_BSIZE); for(bidx=midx->beg+tidx;bidxend;bidx+=ncpu){ - if(KBM_LOG == 0 && tidx == 0 && ((bidx - midx->beg) % 100000) == 0){ fprintf(KBM_LOGF, "\r%u", bidx - midx->beg); fflush(KBM_LOGF); } + if(KBM_LOG == 0 && tidx == 0 && ((bidx - midx->beg) % 100000) == 0){ fprintf(KBM_LOGF, "\r%llu", bidx - midx->beg); fflush(KBM_LOGF); } bin = ref_kbmbinv(kbm->bins, bidx); if(bin->closed) continue; bin->degree = 0; - if(((u4i)bin->off + 1) * kbm->par->kbsize > kbm->reads->buffer[bin->ridx].rdlen) continue; - off = kbm->reads->buffer[bin->ridx].rdoff + bin->off * kbm->par->kbsize; - len = kbm->par->kbsize; + off = (kbm->reads->buffer[bin->ridx].seqoff + bin->off) * KBM_BIN_SIZE; + len = KBM_BIN_SIZE; split_FIXP_kmers_kbm(kbm->rdseqs, off, len, kbm->par->ksize, kbm->par->psize, kbm->par->kmer_mod, kmers); clear_u4v(chgs); for(i=0;i<2;i++){ @@ -970,12 +948,6 @@ static inline void index_kbm(KBM *kbm, u8i beg, u8i end, u4i ncpu, FILE *kmstat) for(i=0;ihashs[i]); kcnts = NULL; MAX = KBM_MAX_KCNT; - //if(kbm->kfs){ - //free(kbm->kfs); - //kbm->kfs = NULL; - //} - //if(kbm->par->kmin <= 1) kbm->par->use_kf = 0; - //kbm->kfs = kbm->par->use_kf? calloc(KBM_KF_SIZE / 4 / 8, 8) : NULL; hash_locks = calloc(KBM_N_HASH, sizeof(pthread_mutex_t)); thread_beg_init(midx, ncpu); midx->kbm = kbm; @@ -1143,9 +1115,7 @@ static inline void index_kbm(KBM *kbm, u8i beg, u8i end, u4i ncpu, FILE *kmstat) thread_beg_close(midx); if(midx->cnts) free(midx->cnts); thread_end_close(midx); - if(1){ - free(hash_locks); - } + free(hash_locks); print_proc_stat_info(0); } @@ -1168,9 +1138,8 @@ static inline void simple_index_kbm(KBM *kbm, u8i beg, u8i end){ for(bidx=beg;bidxbins, bidx); if(bin->closed) continue; - if(((u4i)bin->off + 1) * kbm->par->kbsize > kbm->reads->buffer[bin->ridx].rdlen) continue; - off = kbm->reads->buffer[bin->ridx].rdoff + bin->off * kbm->par->kbsize; - split_FIXP_kmers_kbm(kbm->rdseqs, off, kbm->par->kbsize, kbm->par->ksize, kbm->par->psize, kbm->par->kmer_mod, kmers); + off = (kbm->reads->buffer[bin->ridx].seqoff + bin->off) * KBM_BIN_SIZE; + split_FIXP_kmers_kbm(kbm->rdseqs, off, KBM_BIN_SIZE, kbm->par->ksize, kbm->par->psize, kbm->par->kmer_mod, kmers); for(i=0;i<2;i++){ for(j=0;jsize;j++){ f = ref_kmeroffv(kmers[i], j); @@ -1229,9 +1198,8 @@ static inline void simple_index_kbm(KBM *kbm, u8i beg, u8i end){ for(bidx=beg;bidxbins, bidx); if(bin->closed) continue; - if(((u4i)bin->off + 1) * kbm->par->kbsize > kbm->reads->buffer[bin->ridx].rdlen) continue; - off = kbm->reads->buffer[bin->ridx].rdoff + bin->off * kbm->par->kbsize; - split_FIXP_kmers_kbm(kbm->rdseqs, off, kbm->par->kbsize, kbm->par->ksize, kbm->par->psize, kbm->par->kmer_mod, kmers); + off = (kbm->reads->buffer[bin->ridx].seqoff + bin->off) * KBM_BIN_SIZE; + split_FIXP_kmers_kbm(kbm->rdseqs, off, KBM_BIN_SIZE, kbm->par->ksize, kbm->par->psize, kbm->par->kmer_mod, kmers); for(i=0;i<2;i++){ for(j=0;jsize;j++){ f = ref_kmeroffv(kmers[i], j); @@ -1429,9 +1397,9 @@ static inline void query_index_kbm(KBMAux *aux, char *qtag, u4i qidx, BaseBank * aux->qseqs = rdseqs; aux->qsoff = seqoff; aux->qlen = seqlen; - aux->slen = (seqlen / kbm->par->kbsize) * kbm->par->kbsize; + aux->slen = (seqlen / KBM_BIN_SIZE) * KBM_BIN_SIZE; aux->qidx = qidx; - aux->qnbin = aux->slen / kbm->par->kbsize; + aux->qnbin = aux->slen / KBM_BIN_SIZE; aux->qnbit = (aux->qnbin + 63) & 0xFFFFFFC0U; clear_kbmdp(aux->dps[0], aux, 0); clear_kbmdp(aux->dps[1], aux, 0); @@ -1473,12 +1441,12 @@ static inline void query_index_kbm(KBMAux *aux, char *qtag, u4i qidx, BaseBank * ref->off = f->off; ref->dir = i; if(par->self_aln && aux->solids){ - sidx = (kbm->reads->buffer[qidx].rdoff + ref->off) >> 1; + sidx = (kbm->reads->buffer[qidx].seqoff * KBM_BIN_SIZE + ref->off) >> 1; ref->fine = get_bitvec(aux->solids, sidx); } else { ref->fine = 0; } - ref->qbidx = ref->off / kbm->par->kbsize; + ref->qbidx = ref->off / KBM_BIN_SIZE; ref->poffs[0] = ref->off; ref->poffs[1] = aux->slen - (ref->off + (aux->par->ksize + aux->par->psize)); ref->boff = x->off; @@ -1546,7 +1514,7 @@ static inline void query_index_kbm(KBMAux *aux, char *qtag, u4i qidx, BaseBank * ref->closed = 1; } } - } else if(aux->par->ksampling < kbm->par->kbsize && aux->refs->size){ + } else if(aux->par->ksampling < KBM_BIN_SIZE && aux->refs->size){ sort_array(aux->refs->buffer, aux->refs->size, kbm_ref_t, num_cmpgtx(a.qbidx, b.qbidx, b.bend - b.boff, a.bend - a.boff)); tot = 0; for(i=j=0;irefs->size;i++){ @@ -1863,7 +1831,7 @@ static inline int _backtrace_map_kbm(KBMAux *aux, int dir, kbm_path_t *p){ push_bitsvec(aux->cigars, lst); } cglen = aux->cigars->size - cgoff; - if(mat < (u4i)aux->par->min_mat || mat < UInt(hit->aln * aux->kbm->par->kbsize * aux->par->min_sim) + if(mat < (u4i)aux->par->min_mat || mat < UInt(hit->aln * KBM_BSIZE * aux->par->min_sim) || gap > (u4i)(hit->aln * aux->par->max_gap) || hit->aln < (int)aux->par->min_aln || num_diff(hit->qe - hit->qb, hit->te - hit->tb) > (int)num_max(aux->par->aln_var * hit->aln, 1.0)){ @@ -1900,8 +1868,6 @@ static inline int _backtrace_map_kbm(KBMAux *aux, int dir, kbm_path_t *p){ hit->gap = gap; hit->cgoff = cgoff; hit->cglen = cglen; - //if(hit->qe > (int)aux->qlen) hit->qe = aux->qlen; - //if(hit->te > (int)aux->kbm->reads->buffer[hit->tidx].rdlen) hit->te = aux->kbm->reads->buffer[hit->tidx].rdlen; if(dir){ tmp = aux->qnbin - hit->qb; hit->qb = aux->qnbin - hit->qe; @@ -1937,9 +1903,9 @@ static inline void print_hit_kbm(KBM *kbm, char *qtag, u4i qlen, kbm_map_t *hit, u8i coff; u4i clen, len, bt, _bt; if(hit->mat == 0) return; - fprintf(out, "%s\t%c\t%d\t%d\t%d", qtag, "+-"[hit->qdir], qlen, hit->qb * kbm->par->kbsize, hit->qe * kbm->par->kbsize); - fprintf(out, "\t%s\t%c\t%d\t%d\t%d", kbm->reads->buffer[hit->tidx].tag, "+-"[hit->tdir], kbm->reads->buffer[hit->tidx].rdlen, hit->tb * kbm->par->kbsize, hit->te * kbm->par->kbsize); - fprintf(out, "\t%d\t%d\t%d\t%d\t", hit->mat, hit->aln * kbm->par->kbsize, hit->cnt, hit->gap); + fprintf(out, "%s\t%c\t%d\t%d\t%d", qtag, "+-"[hit->qdir], qlen, hit->qb * KBM_BIN_SIZE, hit->qe * KBM_BIN_SIZE); + fprintf(out, "\t%s\t%c\t%d\t%d\t%d", kbm->reads->buffer[hit->tidx].tag, "+-"[hit->tdir], kbm->reads->buffer[hit->tidx].bincnt * KBM_BIN_SIZE, hit->tb * KBM_BIN_SIZE, hit->te * KBM_BIN_SIZE); + fprintf(out, "\t%d\t%d\t%d\t%d\t", hit->mat, hit->aln * KBM_BIN_SIZE, hit->cnt, hit->gap); if(cigars){ str = _str? _str : init_string(64); bt = len = 0; @@ -2050,7 +2016,8 @@ static inline void push_kmer_match_kbm(KBMAux *aux, int dir, kbm_dpe_t *p){ KBMDP *dp; kbm_cmer_t *c; kbm_dpe_t *e, E; - u4i i, qb, bb, kmat, kcnt, blen; + u8i bb; + u4i i, qb, kmat, kcnt, blen; kbm = aux->kbm; dp = aux->dps[dir]; if(p == NULL){ @@ -2126,7 +2093,7 @@ static inline void push_kmer_match_kbm(KBMAux *aux, int dir, kbm_dpe_t *p){ E.poff = 0; for(i=0;i<=dp->kms->size;i++){ e = (i < dp->kms->size)? ref_kbmdpev(dp->kms, i) : &E; - if(e->bidx == bb && e->poff / kbm->par->kbsize == qb / kbm->par->kbsize){ + if(e->bidx == bb && e->poff / KBM_BIN_SIZE == qb / KBM_BIN_SIZE){ kcnt ++; if(qb + aux->par->ksize + aux->par->psize >= e->poff){ kmat += e->poff - qb; @@ -2134,7 +2101,7 @@ static inline void push_kmer_match_kbm(KBMAux *aux, int dir, kbm_dpe_t *p){ kmat += aux->par->ksize + aux->par->psize; } } else { - one_bitvec(dp->cmask, (bb - dp->boff) * aux->qnbit + qb / kbm->par->kbsize); + one_bitvec(dp->cmask, (bb - dp->boff) * aux->qnbit + qb / KBM_BIN_SIZE); c = next_ref_kbmcmerv(dp->cms); c->koff = i - kcnt; c->kcnt = kcnt; @@ -2142,7 +2109,7 @@ static inline void push_kmer_match_kbm(KBMAux *aux, int dir, kbm_dpe_t *p){ c->boff = bb - dp->boff; #if __DEBUG__ if(KBM_LOG >= KBM_LOG_ALL){ - fprintf(KBM_LOGF, "KBMLOG%d [x=%d, y=%d(%s:%d), off=%d cnt=%d, mat=%d]\n", __LINE__, qb / kbm->par->kbsize, bb, + fprintf(KBM_LOGF, "KBMLOG%d [x=%d, y=%d(%s:%d), off=%d cnt=%d, mat=%d]\n", __LINE__, qb / KBM_BIN_SIZE, bb, aux->kbm->reads->buffer[aux->kbm->bins->buffer[bb].ridx].tag, bb - aux->kbm->reads->buffer[aux->kbm->bins->buffer[bb].ridx].binoff, c->koff, c->kcnt, c->kmat); } #endif @@ -2249,7 +2216,7 @@ static inline void map_kbm(KBMAux *aux){ for(j=0;jcaches[i]->size;j++){ #if __DEBUG__ if(KBM_LOG >= KBM_LOG_ALL){ - //fprintf(KBM_LOGF, "KBMLOG%d\t%d\t%d\t%c\t%d\t%llu[%d,%d]\t%llu[%d,%d]\n", __LINE__, aux->qidx, ref->poffs[ref->pdir], "+-"[ref->pdir], aux->hptr, ref->bidx, aux->kbm->bins->buffer[ref->bidx].ridx, aux->kbm->bins->buffer[ref->bidx].off * kbm->par->kbsize, (u8i)e->bidx, aux->kbm->bins->buffer[e->bidx].ridx, e->poff); + //fprintf(KBM_LOGF, "KBMLOG%d\t%d\t%d\t%c\t%d\t%llu[%d,%d]\t%llu[%d,%d]\n", __LINE__, aux->qidx, ref->poffs[ref->pdir], "+-"[ref->pdir], aux->hptr, ref->bidx, aux->kbm->bins->buffer[ref->bidx].ridx, aux->kbm->bins->buffer[ref->bidx].off * KBM_BIN_SIZE, (u8i)e->bidx, aux->kbm->bins->buffer[e->bidx].ridx, e->poff); } #endif push_kmer_match_kbm(aux, i, aux->caches[i]->buffer + j); diff --git a/kbmpoa.h b/kbmpoa.h index 4bb321a..8fd6e5a 100644 --- a/kbmpoa.h +++ b/kbmpoa.h @@ -155,8 +155,8 @@ static inline lay_seq_t* iter_kbmblock(void *obj){ break; } hit = ref_kbmmapv(kb->aux->hits, kb->hidx ++); - tsoff = kb->aux->kbm->reads->buffer[hit->tidx].rdoff; - rdlen = kb->aux->kbm->reads->buffer[hit->tidx].rdlen; + tsoff = kb->aux->kbm->reads->buffer[hit->tidx].seqoff * KBM_BIN_SIZE; + rdlen = kb->aux->kbm->reads->buffer[hit->tidx].bincnt * KBM_BIN_SIZE; off = hit->qb; rdoff = hit->qdir? Int(rdlen - hit->te) : hit->tb; { @@ -338,7 +338,7 @@ static inline int map_kbmpoa(CTGCNS *cc, KBMAux *aux, char *rdtag, u4i qidx, Bas int self_aln, max_hit, min_aln, min_mat; kb = (KBMBlock*)cc->obj; reset_ctgcns(cc, kb, iter_kbmblock, info_kbmblock); - seqlen = cvt_kbm_read_length(seqlen, KBM_BIN_SIZE); + seqlen = kbm_cvt_length(seqlen); if(seqlen < 4 * KBM_BIN_SIZE + UInt(aux->par->min_aln)) return 0; if(rdseq && rdseq != aux->kbm->rdseqs){ self_aln = 0; @@ -396,7 +396,7 @@ static inline int map_kbmpoa(CTGCNS *cc, KBMAux *aux, char *rdtag, u4i qidx, Bas ctg->cns->size = seqlen; normalize_basebank(ctg->cns); } else if(ctg->cns->size < seqlen){ - ctg->cns->size = cvt_kbm_read_length(ctg->cns->size, KBM_BIN_SIZE); + ctg->cns->size = kbm_cvt_length(ctg->cns->size); normalize_basebank(ctg->cns); } if(ctg->cns->size == 0){ diff --git a/wtdbg-graph.h b/wtdbg-graph.h index 7f73709..225ee90 100644 --- a/wtdbg-graph.h +++ b/wtdbg-graph.h @@ -3926,14 +3926,14 @@ static inline int gen_seq_traces_graph(Graph *g, tracev *path, String *seq){ inc = (r2->beg - r1->end) * KBM_BIN_SIZE; if(inc <= 0) break; encap_string(seq, inc); - seq_basebank(g->kbm->rdseqs, g->kbm->reads->buffer[r1->rid].rdoff + (r1->end * KBM_BIN_SIZE), inc, seq->string + seq->size); + seq_basebank(g->kbm->rdseqs, g->kbm->reads->buffer[r1->rid].binoff * KBM_BIN_SIZE+ (r1->end * KBM_BIN_SIZE), inc, seq->string + seq->size); seq->size += inc; } else { if(!(t1->dir ^ r1->dir)){ r1++; r2++; continue; } inc = (r1->beg - r2->end) * KBM_BIN_SIZE; if(inc <= 0) break; encap_string(seq, inc); - revseq_basebank(g->kbm->rdseqs, g->kbm->reads->buffer[r1->rid].rdoff + (r2->end * KBM_BIN_SIZE), inc, seq->string + seq->size); + revseq_basebank(g->kbm->rdseqs, (g->kbm->reads->buffer[r1->rid].seqoff + r2->end) * KBM_BIN_SIZE, inc, seq->string + seq->size); seq->size += inc; } inc = 0; @@ -3953,8 +3953,8 @@ static inline int gen_seq_traces_graph(Graph *g, tracev *path, String *seq){ reg = ref_regv(g->regs, ref_nodev(g->nodes, t1->node)->regs.idx); inc = (reg->end - reg->beg) * KBM_BIN_SIZE; encap_string(seq, inc); - if(t1->dir ^ reg->dir) revseq_basebank(g->kbm->rdseqs, g->kbm->reads->buffer[reg->rid].rdoff + (reg->beg * KBM_BIN_SIZE), inc, seq->string + seq->size); - else seq_basebank(g->kbm->rdseqs, g->kbm->reads->buffer[reg->rid].rdoff + (reg->beg * KBM_BIN_SIZE), inc, seq->string + seq->size); + if(t1->dir ^ reg->dir) revseq_basebank(g->kbm->rdseqs, (g->kbm->reads->buffer[reg->rid].seqoff + reg->beg) * KBM_BIN_SIZE, inc, seq->string + seq->size); + else seq_basebank(g->kbm->rdseqs, (g->kbm->reads->buffer[reg->rid].seqoff + reg->beg) * KBM_BIN_SIZE, inc, seq->string + seq->size); seq->size += inc; } return seq->size; @@ -4441,9 +4441,9 @@ static inline u8i print_ctgs_graph(Graph *g, u8i uid, u8i beg, u8i end, char *pr reg = ref_layregv(regs, lay->roff + c); fprintf(bw->out, "%c\t%s\t%c\t%d\t%d\t", "Ss"[reg->view], g->kbm->reads->buffer[reg->rid].tag, "+-"[reg->dir], reg->beg * KBM_BIN_SIZE, (reg->end - reg->beg) * KBM_BIN_SIZE); if(reg->dir){ - print_revseq_basebank(g->kbm->rdseqs, g->kbm->reads->buffer[reg->rid].rdoff + reg->beg * KBM_BIN_SIZE, (reg->end - reg->beg) * KBM_BIN_SIZE, bw->out); + print_revseq_basebank(g->kbm->rdseqs, (g->kbm->reads->buffer[reg->rid].seqoff + reg->beg) * KBM_BIN_SIZE, (reg->end - reg->beg) * KBM_BIN_SIZE, bw->out); } else { - print_seq_basebank(g->kbm->rdseqs, g->kbm->reads->buffer[reg->rid].rdoff + reg->beg * KBM_BIN_SIZE, (reg->end - reg->beg) * KBM_BIN_SIZE, bw->out); + print_seq_basebank(g->kbm->rdseqs, (g->kbm->reads->buffer[reg->rid].seqoff + reg->beg) * KBM_BIN_SIZE, (reg->end - reg->beg) * KBM_BIN_SIZE, bw->out); } fprintf(bw->out, "\n"); } @@ -4503,26 +4503,16 @@ static inline u4i print_traces_graph(Graph *g, tracev *path, FILE *out){ fprintf(out, "S\t%s\t", g->kbm->reads->buffer[rid].tag); fprintf(out, "+\t%d\t%d\t", (int)beg, (int)(end - beg)); encap_string(str, end - beg); - fwdseq_basebank(g->kbm->rdseqs, g->kbm->reads->buffer[rid].rdoff + beg, end - beg, str->string); + fwdseq_basebank(g->kbm->rdseqs, g->kbm->reads->buffer[rid].seqoff * KBM_BIN_SIZE + beg, end - beg, str->string); fputs(str->string, out); - //beg += g->kbm->reads->buffer[rid].rdoff; - //end += g->kbm->reads->buffer[rid].rdoff; - //for(j=beg;jkbm->rdseqs->bits, j)], out); - //} } else { if(!(t1->dir ^ r1->dir)){ r1 ++; r2 ++; continue; } beg = r2->beg * KBM_BIN_SIZE; end = r1->end * KBM_BIN_SIZE; fprintf(out, "S\t%s\t", g->kbm->reads->buffer[rid].tag); fprintf(out, "-\t%d\t%d\t", (int)beg, (int)(end - beg)); encap_string(str, end - beg); - revseq_basebank(g->kbm->rdseqs, g->kbm->reads->buffer[rid].rdoff + beg, end - beg, str->string); + revseq_basebank(g->kbm->rdseqs, g->kbm->reads->buffer[rid].seqoff * KBM_BIN_SIZE + beg, end - beg, str->string); fputs(str->string, out); - //beg += g->kbm->reads->buffer[rid].rdoff; - //end += g->kbm->reads->buffer[rid].rdoff; - //for(j=end;j>beg;j--){ - //fputc(bit_base_table[bits2revbit(g->kbm->rdseqs->bits, (j-1))], out); - //} } fputc('\n', out); if(fst){ @@ -4828,7 +4818,7 @@ static inline u8i print_reads_graph(Graph *g, FILE *out){ u4i i; for(i=0;ikbm->reads->size;i++){ rd = ref_readv(g->reads, i); - fprintf(out, "%s\t%d\t%u", g->kbm->reads->buffer[i].tag, g->kbm->reads->buffer[i].rdlen, rd->regs.cnt); + fprintf(out, "%s\t%d\t%u", g->kbm->reads->buffer[i].tag, g->kbm->reads->buffer[i].bincnt * KBM_BIN_SIZE, rd->regs.cnt); idx = rd->regs.idx; while(idx){ r = ref_regv(g->regs, idx); diff --git a/wtdbg.c b/wtdbg.c index 056016c..d883b36 100644 --- a/wtdbg.c +++ b/wtdbg.c @@ -719,7 +719,7 @@ int main(int argc, char **argv){ } nfix = 0; tot_bp = 0; - for(i=0;ireads->size;i++) tot_bp += kbm->reads->buffer[i].rdlen; + for(i=0;ireads->size;i++) tot_bp += kbm->reads->buffer[i].bincnt * KBM_BIN_SIZE; fprintf(KBM_LOGF, "[%s] Done. %u sequences, %llu bp, parameter('-S %d')\n", date(), (u4i)kbm->reads->size, tot_bp, kbm->par->kmer_mod / KBM_N_HASH); { // check KBMPar @@ -762,7 +762,7 @@ int main(int argc, char **argv){ kbm = clone_seqs_kbm(kbm, par); nfix = 0; tot_bp = 0; - for(i=0;ireads->size;i++) tot_bp += kbm->reads->buffer[i].rdlen; + for(i=0;ireads->size;i++) tot_bp += kbm->reads->buffer[i].bincnt * KBM_BIN_SIZE; fprintf(KBM_LOGF, "[%s] Done. %u sequences, %llu bp\n", date(), (u4i)kbm->reads->size, tot_bp); } else { kbm = init_kbm(par); diff --git a/wtdbg.h b/wtdbg.h index 1d0b357..48ff85c 100644 --- a/wtdbg.h +++ b/wtdbg.h @@ -266,7 +266,7 @@ static inline Graph* init_graph(KBM *kbm){ g->reads->size = kbm->reads->size; for(rid=0;ridreads->size;rid++){ g->reads->buffer[rid].clps[0] = 0; - g->reads->buffer[rid].clps[1] = g->kbm->reads->buffer[rid].rdlen / KBM_BIN_SIZE; + g->reads->buffer[rid].clps[1] = g->kbm->reads->buffer[rid].bincnt; } g->nodes = init_nodev(32); g->rdhits = init_rdhitv(1024); @@ -512,7 +512,7 @@ static inline int hit2rdregs_graph(Graph *g, rdregv *regs, int qlen, kbm_map_t * #if DEBUG if(x + 1 + hit->qb != hit->qe || y + 1 + hit->tb != hit->te){ fprintf(stderr, " -- something wrong in %s -- %s:%d --\n", __FUNCTION__, __FILE__, __LINE__); fflush(stderr); - print_hit_kbm(g->kbm, g->kbm->reads->buffer[hit->qidx].tag, g->kbm->reads->buffer[hit->qidx].rdlen, hit, cigars, NULL, stderr); + print_hit_kbm(g->kbm, g->kbm->reads->buffer[hit->qidx].tag, g->kbm->reads->buffer[hit->qidx].bincnt * KBM_BIN_SIZE, hit, cigars, NULL, stderr); abort(); } #endif @@ -693,18 +693,15 @@ if(mdbg->task == 1){ if(reg->closed) continue; //if(g->corr_mode){ if(0){ - //if(map_kbmpoa(mdbg->cc, aux, kbm->reads->buffer[reg->rid].tag, reg->rid, kbm->rdseqs, kbm->reads->buffer[reg->rid].rdoff + UInt(reg->beg) * KBM_BIN_SIZE, UInt(reg->end - reg->beg) * KBM_BIN_SIZE, g->corr_min, g->corr_max, g->corr_cov, NULL) == 0){ - //clear_kbmmapv(aux->hits); - //} } else { - query_index_kbm(aux, NULL, reg->rid, kbm->rdseqs, kbm->reads->buffer[reg->rid].rdoff + UInt(reg->beg) * KBM_BIN_SIZE, UInt(reg->end - reg->beg) * KBM_BIN_SIZE); + query_index_kbm(aux, NULL, reg->rid, kbm->rdseqs, (kbm->reads->buffer[reg->rid].seqoff + UInt(reg->beg)) * KBM_BIN_SIZE, UInt(reg->end - reg->beg) * KBM_BIN_SIZE); map_kbm(aux); } if(raux && aux->hits->size){ // refine kbm_read_t *rd; u4i i, j, tidx; clear_kbm(raux->kbm); - bitpush_kbm(raux->kbm, NULL, 0, kbm->rdseqs->bits, kbm->reads->buffer[reg->rid].rdoff + UInt(reg->beg) * KBM_BIN_SIZE, UInt(reg->end - reg->beg) * KBM_BIN_SIZE); + bitpush_kbm(raux->kbm, NULL, 0, kbm->rdseqs->bits, (kbm->reads->buffer[reg->rid].seqoff + UInt(reg->beg)) * KBM_BIN_SIZE, UInt(reg->end - reg->beg) * KBM_BIN_SIZE); ready_kbm(raux->kbm); simple_index_kbm(raux->kbm, 0, raux->kbm->bins->size); clear_u4v(tidxs); @@ -718,7 +715,7 @@ if(mdbg->task == 1){ for(i=0;isize;i++){ tidx = get_u4v(tidxs, i); rd = ref_kbmreadv(aux->kbm->reads, tidx); - query_index_kbm(raux, rd->tag, tidx, aux->kbm->rdseqs, rd->rdoff, rd->rdlen); + query_index_kbm(raux, rd->tag, tidx, aux->kbm->rdseqs, rd->seqoff * KBM_BIN_SIZE, rd->bincnt * KBM_BIN_SIZE); map_kbm(raux); for(j=0;jhits->size;j++){ flip_hit_kbmaux(aux, raux, j); @@ -1262,9 +1259,9 @@ static inline u8i load_alignments_core(Graph *g, FileReader *pws, int raw, rdreg //if(g->corr_mode){ if(0){ //g->reads->buffer[hit->qidx].corr_bincnt = qlen / KBM_BIN_SIZE; - } else if(qlen != (int)g->kbm->reads->buffer[hit->qidx].rdlen){ + } else if(qlen != (int)g->kbm->reads->buffer[hit->qidx].bincnt * KBM_BIN_SIZE){ if(nwarn < mwarn){ - fprintf(stderr, " -- inconsisitent read length \"%s\" %d != %d in %s -- %s:%d --\n", qtag, qlen, g->kbm->reads->buffer[hit->qidx].rdlen, __FUNCTION__, __FILE__, __LINE__); fflush(stderr); + fprintf(stderr, " -- inconsisitent read length \"%s\" %d != %d in %s -- %s:%d --\n", qtag, qlen, g->kbm->reads->buffer[hit->qidx].bincnt * KBM_BIN_SIZE, __FUNCTION__, __FILE__, __LINE__); fflush(stderr); nwarn ++; } } @@ -1393,12 +1390,12 @@ static inline u8i proc_alignments_core(Graph *g, int ncpu, int raw, rdregv *regs if(g->corr_mode){ mbp = g->genome_size * g->corr_gcov; qb = qe = g->kbm->reads->size / 2; - nbp = g->kbm->reads->buffer[qb].rdlen; + nbp = g->kbm->reads->buffer[qb].bincnt * KBM_BIN_SIZE; while(nbp < mbp && qb && qe + 1 < g->kbm->reads->size){ qb --; qe ++; - nbp += g->kbm->reads->buffer[qb].rdlen; - nbp += g->kbm->reads->buffer[qe].rdlen; + nbp += g->kbm->reads->buffer[qb].bincnt * KBM_BIN_SIZE; + nbp += g->kbm->reads->buffer[qe].bincnt * KBM_BIN_SIZE; } if(qe < g->kbm->reads->size) qe ++; fprintf(KBM_LOGF, "[%s] turn correct-mode on, reads[%u ~ %u = %u] (%llu bp), genome-size=%llu, corr-gcov=%0.2f, corr-dep=[%d,%d,%0.2f]\n", date(), qb, qe, qe - qb, nbp, g->genome_size, g->corr_gcov, g->corr_min, g->corr_max, g->corr_cov); fflush(KBM_LOGF); @@ -1592,7 +1589,7 @@ static inline u8i proc_alignments_core(Graph *g, int ncpu, int raw, rdregv *regs fprintf(KBM_LOGF, "QUERY: %s\t+\t%d\t%d\n", g->kbm->reads->buffer[mdbg->reg.rid].tag, mdbg->reg.beg, mdbg->reg.end); for(i=0;iaux->hits->size;i++){ hit = ref_kbmmapv(mdbg->aux->hits, i); - fprintf(KBM_LOGF, "\t%s\t%c\t%d\t%d\t%d\t%d\t%d\n", g->kbm->reads->buffer[hit->tidx].tag, "+-"[hit->qdir], g->kbm->reads->buffer[hit->tidx].rdlen, hit->tb * KBM_BIN_SIZE, hit->te * KBM_BIN_SIZE, hit->aln * KBM_BIN_SIZE, hit->mat); + fprintf(KBM_LOGF, "\t%s\t%c\t%d\t%d\t%d\t%d\t%d\n", g->kbm->reads->buffer[hit->tidx].tag, "+-"[hit->qdir], g->kbm->reads->buffer[hit->tidx].bincnt * KBM_BIN_SIZE, hit->tb * KBM_BIN_SIZE, hit->te * KBM_BIN_SIZE, hit->aln * KBM_BIN_SIZE, hit->mat); } } mdbg->reg.closed = 1; @@ -1696,9 +1693,9 @@ static inline void build_nodes_graph(Graph *g, u8i maxbp, int ncpu, FileReader * u8i tot, clp; tot = clp = 0; for(rid=0;ridreads->size;rid++){ - tot += g->kbm->reads->buffer[rid].rdlen; + tot += g->kbm->reads->buffer[rid].bincnt * KBM_BIN_SIZE; clp += (g->reads->buffer[rid].clps[1] - g->reads->buffer[rid].clps[0]) * KBM_BIN_SIZE; - fprintf(clplog, "%s\t%d\t%d\t%d\n", g->kbm->reads->buffer[rid].tag, g->kbm->reads->buffer[rid].rdlen, g->reads->buffer[rid].clps[0] * KBM_BIN_SIZE, g->reads->buffer[rid].clps[1] * KBM_BIN_SIZE); + fprintf(clplog, "%s\t%d\t%d\t%d\n", g->kbm->reads->buffer[rid].tag, g->kbm->reads->buffer[rid].bincnt * KBM_BIN_SIZE, g->reads->buffer[rid].clps[0] * KBM_BIN_SIZE, g->reads->buffer[rid].clps[1] * KBM_BIN_SIZE); } fclose(clplog); fprintf(KBM_LOGF, "%.2f%% bases\n", ((tot - clp) * 100.0) / tot); fflush(KBM_LOGF);