Skip to content

Commit 498f128

Browse files
committed
update for empty data
1 parent f3adc53 commit 498f128

File tree

3 files changed

+63
-70
lines changed

3 files changed

+63
-70
lines changed

include/bio/format/sam_input_handler.hpp

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ class format_input_handler<sam> : public format_input_handler_base<format_input_
107107
void parse_field(vtag_t<field::qname> const & /**/, parsed_field_t & parsed_field)
108108
{
109109
std::string_view raw_field = get<field::qname>(raw_record);
110-
if (raw_field != ".")
110+
if (raw_field != "*")
111111
parse_field_aux(raw_field, parsed_field); //default parsing
112112
}
113113

@@ -124,7 +124,9 @@ class format_input_handler<sam> : public format_input_handler_base<format_input_
124124
void parse_field(vtag_t<field::rname> const & /**/, parsed_field_t & parsed_field)
125125
{
126126
std::string_view raw_field = get<field::rname>(raw_record);
127-
parse_field_aux(raw_field, parsed_field);
127+
128+
if (raw_field != "*")
129+
parse_field_aux(raw_field, parsed_field);
128130

129131
// todo insert into header
130132
}
@@ -136,7 +138,7 @@ class format_input_handler<sam> : public format_input_handler_base<format_input_
136138
{
137139
std::string_view raw_field = get<field::cigar>(raw_record);
138140

139-
if (raw_field != ".")
141+
if (raw_field != "*")
140142
{
141143
uint32_t cigar_count{};
142144
char const * ptr = raw_field.data();
@@ -162,10 +164,13 @@ class format_input_handler<sam> : public format_input_handler_base<format_input_
162164
{
163165
std::string_view raw_field = get<field::rnext>(raw_record);
164166

165-
if (raw_field == "=")
166-
raw_field = get<field::rname>(raw_record);
167+
if (raw_field != "*")
168+
{
169+
if (raw_field == "=")
170+
raw_field = get<field::rname>(raw_record);
167171

168-
parse_field_aux(raw_field, parsed_field);
172+
parse_field_aux(raw_field, parsed_field);
173+
}
169174
}
170175

171176
/* PNEXT, TLEN are handled correctly by default */
@@ -176,7 +181,7 @@ class format_input_handler<sam> : public format_input_handler_base<format_input_
176181
{
177182
std::string_view raw_field = get<field::seq>(raw_record);
178183

179-
if (raw_field != ".")
184+
if (raw_field != "*")
180185
parse_field_aux(raw_field, parsed_field); // reading into e.g. dna4 vector
181186
}
182187

@@ -186,7 +191,7 @@ class format_input_handler<sam> : public format_input_handler_base<format_input_
186191
{
187192
std::string_view raw_field = get<field::qual>(raw_record);
188193

189-
if (raw_field != ".")
194+
if (raw_field != "*")
190195
parse_field_aux(raw_field, parsed_field); // reading into e.g. dna4 vector
191196
}
192197

@@ -214,7 +219,7 @@ class format_input_handler<sam> : public format_input_handler_base<format_input_
214219
}
215220

216221
std::string header_string;
217-
while (file_it != std::default_sentinel && file_it.peak() == '#')
222+
while (file_it != std::default_sentinel && file_it.peak() == '@')
218223
{
219224
++file_it;
220225
++line;

test/unit/format/sam_file_format_test_template.hpp

Lines changed: 48 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -150,20 +150,11 @@ struct sam_file_read : public sam_file_data
150150

151151
TYPED_TEST_SUITE_P(sam_file_read);
152152

153-
// ----------------------------------------------------------------------------
154-
// general
155-
// ----------------------------------------------------------------------------
156-
157-
// TYPED_TEST_P(sam_file_read, input_concept)
158-
// {
159-
// EXPECT_TRUE((seqan3::sam_file_input_format<TypeParam>));
160-
// }
161-
162153
// ----------------------------------------------------------------------------
163154
// sam_file_read
164155
// ----------------------------------------------------------------------------
165156

166-
TYPED_TEST_P(sam_file_read, read_in_all_data)
157+
TYPED_TEST_P(sam_file_read, full_data_set)
167158
{
168159
// prepare tag dictionary
169160
this->tag_dicts[0]["NM"_tag] = -7;
@@ -210,36 +201,54 @@ TYPED_TEST_P(sam_file_read, read_in_all_data)
210201
}
211202
}
212203

213-
// TYPED_TEST_P(sam_file_read, read_in_all_but_empty_data)
214-
// {
215-
// typename TestFixture::stream_type istream{this->empty_input};
216-
// seqan3::sam_file_input fin{istream, this->ref_ids, this->ref_sequences, TypeParam{}};
217-
218-
// EXPECT_TRUE((*fin.begin()).sequence().empty());
219-
// EXPECT_TRUE((*fin.begin()).id().empty());
220-
// EXPECT_TRUE((*fin.begin()).base_qualities().empty());
221-
// EXPECT_EQ((*fin.begin()).sequence_position(), 0);
222-
// EXPECT_TRUE(!(*fin.begin()).reference_id().has_value());
223-
// EXPECT_TRUE(!(*fin.begin()).reference_position().has_value());
224-
// EXPECT_TRUE(std::ranges::empty(std::get<0>((*fin.begin()).alignment())));
225-
// EXPECT_TRUE(std::ranges::empty(std::get<1>((*fin.begin()).alignment())));
226-
// EXPECT_EQ((*fin.begin()).flag(), seqan3::sam_flag{0u});
227-
// EXPECT_EQ((*fin.begin()).mapping_quality(), 0u);
228-
// EXPECT_TRUE(!(*fin.begin()).mate_reference_id().has_value());
229-
// EXPECT_TRUE(!(*fin.begin()).mate_position().has_value());
230-
// EXPECT_EQ((*fin.begin()).template_length(), int32_t{});
231-
// EXPECT_TRUE((*fin.begin()).tags().empty());
232-
// }
204+
TYPED_TEST_P(sam_file_read, all_missing_data)
205+
{
206+
typename TestFixture::stream_type istream{this->empty_input};
233207

234-
// TYPED_TEST_P(sam_file_read, read_in_almost_nothing)
235-
// {
236-
// typename TestFixture::stream_type istream{this->simple_three_reads_input};
237-
// seqan3::sam_file_input fin{istream, TypeParam{}, seqan3::fields<seqan3::field::mapq>{}};
208+
using record_t =
209+
bio::detail::record_from_typelist<std::remove_cvref_t<decltype(bio::map_io::default_field_ids)>,
210+
std::remove_cvref_t<decltype(bio::map_io::field_types_sam<>)>>;
211+
bio::map_io::reader_options default_options{};
212+
bio::format_input_handler<bio::sam> input_handler{istream, default_options};
213+
record_t rec;
238214

239-
// size_t i{0};
240-
// for (auto & [mapq] : fin)
241-
// EXPECT_EQ(mapq, this->mapqs[i++]);
242-
// }
215+
input_handler.parse_next_record_into(rec);
216+
217+
EXPECT_TRUE(rec.id().empty());
218+
EXPECT_TRUE(rec.rname().empty());
219+
EXPECT_TRUE(rec.rnext().empty());
220+
EXPECT_TRUE(rec.cigar().empty());
221+
EXPECT_TRUE(rec.seq().empty());
222+
EXPECT_TRUE(rec.qual().empty());
223+
// EXPECT_TRUE(rec.tags().empty()) << tags;
224+
225+
EXPECT_EQ(rec.flag(), bio::map_io::sam_flag{0u});
226+
EXPECT_EQ(rec.pos(), 0);
227+
EXPECT_EQ(rec.pnext(), 0);
228+
EXPECT_EQ(rec.mapq(), 0u);
229+
EXPECT_EQ(rec.tlen(), 0);
230+
}
231+
232+
TYPED_TEST_P(sam_file_read, select_fields)
233+
{
234+
typename TestFixture::stream_type istream{this->empty_input};
235+
236+
constexpr auto fid = bio::vtag<bio::field::rname, bio::field::pos>;
237+
constexpr auto ftype = bio::ttag<std::string_view, int64_t>;
238+
239+
using record_t =
240+
bio::detail::record_from_typelist<std::remove_cvref_t<decltype(fid)>, std::remove_cvref_t<decltype(ftype)>>;
241+
bio::map_io::reader_options default_options{};
242+
bio::format_input_handler<bio::sam> input_handler{istream, default_options};
243+
record_t rec;
244+
245+
for (unsigned i = 0; i < 3; ++i)
246+
{
247+
input_handler.parse_next_record_into(rec);
248+
EXPECT_EQ(rec.rname(), this->ref_id);
249+
EXPECT_EQ(rec.pos(), this->positions[i]);
250+
}
251+
}
243252

244253
// TYPED_TEST_P(sam_file_read, read_in_alignment_only_with_ref)
245254
// {
@@ -327,27 +336,6 @@ TYPED_TEST_P(sam_file_read, read_in_all_data)
327336
// }
328337
// }
329338

330-
// TYPED_TEST_P(sam_file_read, cigar_vector)
331-
// {
332-
// std::vector<std::vector<seqan3::cigar>> expected
333-
// {
334-
// {{1, 'S'_cigar_operation}, {1, 'M'_cigar_operation}, {1, 'D'_cigar_operation}, {1, 'M'_cigar_operation},
335-
// {1, 'I'_cigar_operation}},
336-
// {{1, 'H'_cigar_operation}, {7, 'M'_cigar_operation}, {1, 'D'_cigar_operation}, {1, 'M'_cigar_operation},
337-
// {1, 'S'_cigar_operation}, {2, 'H'_cigar_operation}},
338-
// {{1, 'S'_cigar_operation}, {1, 'M'_cigar_operation}, {1, 'P'_cigar_operation}, {1, 'M'_cigar_operation},
339-
// {1, 'I'_cigar_operation}, {1, 'M'_cigar_operation}, {1, 'I'_cigar_operation}, {1, 'D'_cigar_operation},
340-
// {1, 'M'_cigar_operation}, {1, 'S'_cigar_operation}}
341-
// };
342-
343-
// typename TestFixture::stream_type istream{this->simple_three_reads_input};
344-
// seqan3::sam_file_input fin{istream, TypeParam{}, seqan3::fields<seqan3::field::cigar>{}};
345-
346-
// size_t i{0};
347-
// for (auto & [cigar_v] : fin)
348-
// EXPECT_EQ(cigar_v, expected[i++]);
349-
// }
350-
351339
// TYPED_TEST_P(sam_file_read, format_error_ref_id_not_in_reference_information)
352340
// {
353341
// { // with reference information given
@@ -672,7 +660,7 @@ TYPED_TEST_P(sam_file_read, read_in_all_data)
672660
// seqan3::format_error);
673661
// }
674662

675-
REGISTER_TYPED_TEST_SUITE_P(sam_file_read, read_in_all_data);
663+
REGISTER_TYPED_TEST_SUITE_P(sam_file_read, full_data_set, all_missing_data, select_fields);
676664

677665
// REGISTER_TYPED_TEST_SUITE_P(sam_file_read,
678666
// input_concept,

test/unit/format/sam_input_test.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ read3 43 ref 3 63 1S1M1P1M1I1M1I1D1M1S ref 10 300 GGAGTATA !!*+,-./
6767
"\tbH:H:1AE301\n"
6868
"read3\t43\tref\t3\t63\t1S1M1P1M1I1M1I1D1M1S\tref\t10\t300\tGGAGTATA\t!!*+,-./\n"};
6969

70-
std::string empty_input{"@HD\tVN:1.6\n@SQ\tSN:ref\tLN:34\n*\t0\t*\t0\t0\t*\t*\t0\t0\t*\t*\n"};
70+
std::string empty_input{"*\t0\t*\t0\t0\t*\t*\t0\t0\t*\t*\n"};
7171

7272
std::string empty_cigar{"read1\t41\tref\t1\t61\t*\tref\t10\t300\tACGT\t!##$\n"};
7373

0 commit comments

Comments
 (0)