Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/lr-giraffe' into lr-giraffe
Browse files Browse the repository at this point in the history
  • Loading branch information
adamnovak committed Aug 22, 2024
2 parents b99200a + 292bdd4 commit bd8ded6
Show file tree
Hide file tree
Showing 14 changed files with 3,759 additions and 2,448 deletions.
16 changes: 8 additions & 8 deletions src/algorithms/chain_items.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,8 @@ class Anchor {
/// Get the distance-finding hint information (i.e. "zip code") for
/// accelerating distance queries to the start of this anchor, or null if
/// none is set.
inline ZipCodeDecoder* start_hint() const {
return start_decoder;
inline ZipCode* start_hint() const {
return start_zip;
}

/// Get the graph distance from wherever the start hint is positioned back
Expand All @@ -120,8 +120,8 @@ class Anchor {
/// Get the distance-finding hint information (i.e. "zip code") for
/// accelerating distance queries from the end of this anchor, or null if
/// none is set.
inline ZipCodeDecoder* end_hint() const {
return end_decoder;
inline ZipCode* end_hint() const {
return end_zip;
}

/// Get the graph distance from wherever the end hint is positioned forward
Expand All @@ -142,14 +142,14 @@ class Anchor {

/// Compose a read start position, graph start position, and match length into an Anchor.
/// Can also bring along a distance hint and a seed number.
inline Anchor(size_t read_start, const pos_t& graph_start, size_t length, size_t margin_before, size_t margin_after, int score, size_t seed_number = std::numeric_limits<size_t>::max(), ZipCodeDecoder* hint = nullptr, size_t hint_start = 0) : start(read_start), size(length), margin_before(margin_before), margin_after(margin_after), start_pos(graph_start), end_pos(advance(graph_start, length)), points(score), start_seed(seed_number), end_seed(seed_number), start_decoder(hint), end_decoder(hint), start_offset(hint_start), end_offset(length - hint_start), seed_length(margin_before + length + margin_after) {
inline Anchor(size_t read_start, const pos_t& graph_start, size_t length, size_t margin_before, size_t margin_after, int score, size_t seed_number = std::numeric_limits<size_t>::max(), ZipCode* hint = nullptr, size_t hint_start = 0) : start(read_start), size(length), margin_before(margin_before), margin_after(margin_after), start_pos(graph_start), end_pos(advance(graph_start, length)), points(score), start_seed(seed_number), end_seed(seed_number), start_zip(hint), end_zip(hint), start_offset(hint_start), end_offset(length - hint_start), seed_length(margin_before + length + margin_after) {
// Nothing to do!
}

/// Compose two Anchors into an Anchor that represents coming in through
/// the first one and going out through the second, like a tunnel. Useful
/// for representing chains as chainable items.
inline Anchor(const Anchor& first, const Anchor& last, size_t extra_margin_before, size_t extra_margin_after, int score) : start(first.read_start()), size(last.read_end() - first.read_start()), margin_before(first.margin_before + extra_margin_before), margin_after(last.margin_after + extra_margin_after), start_pos(first.graph_start()), end_pos(last.graph_end()), points(score), start_seed(first.seed_start()), end_seed(last.seed_end()), start_decoder(first.start_hint()), end_decoder(last.end_hint()), start_offset(first.start_offset), end_offset(last.end_offset), seed_length((first.base_seed_length() + last.base_seed_length()) / 2) {
inline Anchor(const Anchor& first, const Anchor& last, size_t extra_margin_before, size_t extra_margin_after, int score) : start(first.read_start()), size(last.read_end() - first.read_start()), margin_before(first.margin_before + extra_margin_before), margin_after(last.margin_after + extra_margin_after), start_pos(first.graph_start()), end_pos(last.graph_end()), points(score), start_seed(first.seed_start()), end_seed(last.seed_end()), start_zip(first.start_hint()), end_zip(last.end_hint()), start_offset(first.start_offset), end_offset(last.end_offset), seed_length((first.base_seed_length() + last.base_seed_length()) / 2) {
// Nothing to do!
}

Expand All @@ -170,8 +170,8 @@ class Anchor {
int points;
size_t start_seed;
size_t end_seed;
ZipCodeDecoder* start_decoder;
ZipCodeDecoder* end_decoder;
ZipCode* start_zip;
ZipCode* end_zip;
size_t start_offset;
size_t end_offset;
size_t seed_length;
Expand Down
4 changes: 2 additions & 2 deletions src/minimizer_mapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3751,6 +3751,7 @@ std::vector<MinimizerMapper::Seed> MinimizerMapper::find_seeds(const std::vector
if (minimizer.occs[j].payload == MIPayload::NO_CODE) {
//If the zipcocde wasn't saved, then calculate it
seeds.back().zipcode.fill_in_zipcode(*(this->distance_index), hit);
seeds.back().zipcode.fill_in_full_decoder();
} else if (minimizer.occs[j].payload.first == 0) {
//If the minimizer stored the index into a list of zipcodes
if (!this->zipcodes->empty()) {
Expand All @@ -3759,13 +3760,12 @@ std::vector<MinimizerMapper::Seed> MinimizerMapper::find_seeds(const std::vector
} else {
//If we don't have the oversized payloads, then fill in the zipcode using the pos
seeds.back().zipcode.fill_in_zipcode(*(this->distance_index), hit);
seeds.back().zipcode.fill_in_full_decoder();
}
} else {
//If the zipcode was saved in the payload
seeds.back().zipcode.fill_in_zipcode_from_payload(minimizer.occs[j].payload);
}
ZipCodeDecoder* decoder = new ZipCodeDecoder(&seeds.back().zipcode);
seeds.back().zipcode_decoder.reset(decoder);

}

Expand Down
8 changes: 4 additions & 4 deletions src/minimizer_mapper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -601,15 +601,15 @@ class MinimizerMapper : public AlignerClient {

/// How do we convert chain info to an actual seed of the type we are using?
/// Also needs to know the hit position, and the minimizer number.
inline static Seed chain_info_to_seed(const pos_t& hit, size_t minimizer, const ZipCode& zip, ZipCodeDecoder* decoder) {
return { hit, minimizer, zip, std::unique_ptr<ZipCodeDecoder>(decoder)};
inline static Seed chain_info_to_seed(const pos_t& hit, size_t minimizer, const ZipCode& zip) {
return { hit, minimizer, zip};
}

/// Convert a collection of seeds to a collection of chaining anchors.
std::vector<algorithms::Anchor> to_anchors(const Alignment& aln, const VectorView<Minimizer>& minimizers, const std::vector<Seed>& seeds) const;
std::vector<algorithms::Anchor> to_anchors(const Alignment& aln, const VectorView<Minimizer>& minimizers, std::vector<Seed>& seeds) const;

/// Convert a single seed to a single chaining anchor.
static algorithms::Anchor to_anchor(const Alignment& aln, const VectorView<Minimizer>& minimizers, const std::vector<Seed>& seeds, size_t seed_number, const HandleGraph& graph, const Aligner* aligner);
static algorithms::Anchor to_anchor(const Alignment& aln, const VectorView<Minimizer>& minimizers, std::vector<Seed>& seeds, size_t seed_number, const HandleGraph& graph, const Aligner* aligner);

/// Convert a read region, and the seeds that that region covers the
/// stapled bases of (sorted by stapled base), into a single chaining
Expand Down
46 changes: 23 additions & 23 deletions src/minimizer_mapper_from_chains.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,26 +91,26 @@ static pos_t forward_pos(const MinimizerMapper::Seed& seed, const VectorView<Min
static bool chain_ranges_are_equivalent(const MinimizerMapper::Seed& start_seed1, const MinimizerMapper::Seed& end_seed1,
const MinimizerMapper::Seed& start_seed2, const MinimizerMapper::Seed& end_seed2) {
#ifdef debug
assert(start_seed1.zipcode_decoder->get_distance_index_address(0) ==
end_seed1.zipcode_decoder->get_distance_index_address(0));
assert(start_seed2.zipcode_decoder->get_distance_index_address(0) ==
end_seed2.zipcode_decoder->get_distance_index_address(0));
assert(start_seed1.zipcode.get_distance_index_address(0) ==
end_seed1.zipcode.get_distance_index_address(0));
assert(start_seed2.zipcode.get_distance_index_address(0) ==
end_seed2.zipcode.get_distance_index_address(0));
#endif
if (start_seed1.zipcode_decoder->get_distance_index_address(0) !=
start_seed2.zipcode_decoder->get_distance_index_address(0)) {
if (start_seed1.zipcode.get_distance_index_address(0) !=
start_seed2.zipcode.get_distance_index_address(0)) {
//If the two ranges are on different connected components
return false;
}
if (start_seed1.zipcode_decoder->get_code_type(0) == ZipCode::ROOT_SNARL) {
if (start_seed1.zipcode.get_code_type(0) == ZipCode::ROOT_SNARL) {
//If this is in a root snarl
if (start_seed1.zipcode_decoder->get_rank_in_snarl(1) !=
start_seed2.zipcode_decoder->get_rank_in_snarl(1)
if (start_seed1.zipcode.get_rank_in_snarl(1) !=
start_seed2.zipcode.get_rank_in_snarl(1)
||
start_seed1.zipcode_decoder->get_rank_in_snarl(1) !=
end_seed1.zipcode_decoder->get_rank_in_snarl(1)
start_seed1.zipcode.get_rank_in_snarl(1) !=
end_seed1.zipcode.get_rank_in_snarl(1)
||
start_seed2.zipcode_decoder->get_rank_in_snarl(1) !=
end_seed2.zipcode_decoder->get_rank_in_snarl(1)) {
start_seed2.zipcode.get_rank_in_snarl(1) !=
end_seed2.zipcode.get_rank_in_snarl(1)) {
//If the two ranges are on different children of the snarl
return false;
}
Expand All @@ -119,20 +119,20 @@ static bool chain_ranges_are_equivalent(const MinimizerMapper::Seed& start_seed1
//Get the offset used for determining the range
//On the top-level chain, node, or child of the top-level snarl
auto get_seed_offset = [&] (const MinimizerMapper::Seed& seed) {
if (seed.zipcode_decoder->get_code_type(0) == ZipCode::ROOT_CHAIN) {
return seed.zipcode_decoder->get_offset_in_chain(1);
} else if (seed.zipcode_decoder->get_code_type(0) == ZipCode::ROOT_NODE) {
return is_rev(seed.pos) ? seed.zipcode_decoder->get_length(0) - offset(seed.pos)
if (seed.zipcode.get_code_type(0) == ZipCode::ROOT_CHAIN) {
return seed.zipcode.get_offset_in_chain(1);
} else if (seed.zipcode.get_code_type(0) == ZipCode::ROOT_NODE) {
return is_rev(seed.pos) ? seed.zipcode.get_length(0) - offset(seed.pos)
: offset(seed.pos);
} else {
//Otherwise, this is a top-level snarl, and we've already made sure that it's on the
//same child chain/node
if (seed.zipcode_decoder->get_code_type(1) == ZipCode::CHAIN) {
if (seed.zipcode.get_code_type(1) == ZipCode::CHAIN) {
//On a chain
return seed.zipcode_decoder->get_offset_in_chain(2);
return seed.zipcode.get_offset_in_chain(2);
} else {
//On a node
return is_rev(seed.pos) ? seed.zipcode_decoder->get_length(1) - offset(seed.pos)
return is_rev(seed.pos) ? seed.zipcode.get_length(1) - offset(seed.pos)
: offset(seed.pos);
}
}
Expand Down Expand Up @@ -3964,7 +3964,7 @@ WFAAlignment MinimizerMapper::connect_consistently(const std::string& sequence,
return result;
}

std::vector<algorithms::Anchor> MinimizerMapper::to_anchors(const Alignment& aln, const VectorView<Minimizer>& minimizers, const std::vector<Seed>& seeds) const {
std::vector<algorithms::Anchor> MinimizerMapper::to_anchors(const Alignment& aln, const VectorView<Minimizer>& minimizers, std::vector<Seed>& seeds) const {
std::vector<algorithms::Anchor> to_return;
to_return.reserve(seeds.size());
for (size_t i = 0; i < seeds.size(); i++) {
Expand All @@ -3973,7 +3973,7 @@ std::vector<algorithms::Anchor> MinimizerMapper::to_anchors(const Alignment& aln
return to_return;
}

algorithms::Anchor MinimizerMapper::to_anchor(const Alignment& aln, const VectorView<Minimizer>& minimizers, const std::vector<Seed>& seeds, size_t seed_number, const HandleGraph& graph, const Aligner* aligner) {
algorithms::Anchor MinimizerMapper::to_anchor(const Alignment& aln, const VectorView<Minimizer>& minimizers, std::vector<Seed>& seeds, size_t seed_number, const HandleGraph& graph, const Aligner* aligner) {
// Turn each seed into the part of its match on the node where the
// anchoring end (start for forward-strand minimizers, end for
// reverse-strand minimizers) falls.
Expand Down Expand Up @@ -4031,7 +4031,7 @@ algorithms::Anchor MinimizerMapper::to_anchor(const Alignment& aln, const Vector
// TODO: Always make sequence and quality available for scoring!
// We're going to score the anchor as the full minimizer, and rely on the margins to stop us from taking overlapping anchors.
int score = aligner->score_exact_match(aln, read_start - margin_left, length + margin_right);
return algorithms::Anchor(read_start, graph_start, length, margin_left, margin_right, score, seed_number, seed.zipcode_decoder.get(), hint_start);
return algorithms::Anchor(read_start, graph_start, length, margin_left, margin_right, score, seed_number, &(seed.zipcode), hint_start);
}

algorithms::Anchor MinimizerMapper::to_anchor(const Alignment& aln, size_t read_start, size_t read_end, const std::vector<size_t>& sorted_seeds, const std::vector<algorithms::Anchor>& seed_anchors, const std::vector<size_t>::const_iterator& mismatch_begin, const std::vector<size_t>::const_iterator& mismatch_end, const HandleGraph& graph, const Aligner* aligner) {
Expand Down
Loading

1 comment on commit bd8ded6

@adamnovak
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vg CI tests complete for branch lr-giraffe. View the full report here.

15 tests passed, 1 tests failed and 0 tests skipped in 16185 seconds

Failed tests:

  • test_sim_yeast_cactus (148 seconds)

Please sign in to comment.