Skip to content

Commit 2b5ae3f

Browse files
Add nucleotide-count exercise (#286)
1 parent b2bcdef commit 2b5ae3f

File tree

8 files changed

+278
-0
lines changed

8 files changed

+278
-0
lines changed

config.json

+9
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,15 @@
333333
"math"
334334
]
335335
},
336+
{
337+
"slug": "nucleotide-count",
338+
"name": "Nucleotide Count",
339+
"uuid": "4ce578b2-9cfb-498b-947f-79f97abeb224",
340+
"practices": [],
341+
"prerequisites": [],
342+
"difficulty": 2,
343+
"topics": []
344+
},
336345
{
337346
"slug": "eliuds-eggs",
338347
"name": "Eliud's Eggs",
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Instructions
2+
3+
Each of us inherits from our biological parents a set of chemical instructions known as DNA that influence how our bodies are constructed.
4+
All known life depends on DNA!
5+
6+
> Note: You do not need to understand anything about nucleotides or DNA to complete this exercise.
7+
8+
DNA is a long chain of other chemicals and the most important are the four nucleotides, adenine, cytosine, guanine and thymine.
9+
A single DNA chain can contain billions of these four nucleotides and the order in which they occur is important!
10+
We call the order of these nucleotides in a bit of DNA a "DNA sequence".
11+
12+
We represent a DNA sequence as an ordered collection of these four nucleotides and a common way to do that is with a string of characters such as "ATTACG" for a DNA sequence of 6 nucleotides.
13+
'A' for adenine, 'C' for cytosine, 'G' for guanine, and 'T' for thymine.
14+
15+
Given a string representing a DNA sequence, count how many of each nucleotide is present.
16+
If the string contains characters that aren't A, C, G, or T then it is invalid and you should signal an error.
17+
18+
For example:
19+
20+
```text
21+
"GATTACA" -> 'A': 3, 'C': 1, 'G': 1, 'T': 2
22+
"INVALID" -> error
23+
```
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
{
2+
"authors": [
3+
"keiravillekode"
4+
],
5+
"files": {
6+
"solution": [
7+
"nucleotide-count.sml"
8+
],
9+
"test": [
10+
"test.sml"
11+
],
12+
"example": [
13+
".meta/example.sml"
14+
]
15+
},
16+
"blurb": "Given a DNA string, compute how many times each nucleotide occurs in the string.",
17+
"source": "The Calculating DNA Nucleotides_problem at Rosalind",
18+
"source_url": "https://rosalind.info/problems/dna/"
19+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
fun nucleotideCounts (strand: string): {a: int, c: int, g: int, t: int} =
2+
let
3+
fun recurse(a: int, c: int, g: int, t: int, nil) = {a = a, c = c, g = g, t = t}
4+
| recurse(a: int, c: int, g: int, t: int, hd :: tl) =
5+
case hd of
6+
#"A" => recurse(a + 1, c, g, t, tl)
7+
| #"C" => recurse(a, c + 1, g, t, tl)
8+
| #"G" => recurse(a, c, g + 1, t, tl)
9+
| #"T" => recurse(a, c, g, t + 1, tl)
10+
| _ => raise Fail "Invalid nucleotide in strand"
11+
in
12+
recurse (0, 0, 0, 0, explode strand)
13+
end
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# This is an auto-generated file.
2+
#
3+
# Regenerating this file via `configlet sync` will:
4+
# - Recreate every `description` key/value pair
5+
# - Recreate every `reimplements` key/value pair, where they exist in problem-specifications
6+
# - Remove any `include = true` key/value pair (an omitted `include` key implies inclusion)
7+
# - Preserve any other key/value pair
8+
#
9+
# As user-added comments (using the # character) will be removed when this file
10+
# is regenerated, comments can be added via a `comment` key.
11+
12+
[3e5c30a8-87e2-4845-a815-a49671ade970]
13+
description = "empty strand"
14+
15+
[a0ea42a6-06d9-4ac6-828c-7ccaccf98fec]
16+
description = "can count one nucleotide in single-character input"
17+
18+
[eca0d565-ed8c-43e7-9033-6cefbf5115b5]
19+
description = "strand with repeated nucleotide"
20+
21+
[40a45eac-c83f-4740-901a-20b22d15a39f]
22+
description = "strand with multiple nucleotides"
23+
24+
[b4c47851-ee9e-4b0a-be70-a86e343bd851]
25+
description = "strand with invalid nucleotides"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
fun nucleotideCounts (strand: string): {a: int, c: int, g: int, t: int} =
2+
raise Fail "'nucleotideCounts' is not implemented"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
(* version 1.0.0 *)
2+
3+
use "testlib.sml";
4+
use "nucleotide-count.sml";
5+
6+
infixr |>
7+
fun x |> f = f x
8+
9+
val testsuite =
10+
describe "nucleotide-count" [
11+
test "empty strand"
12+
(fn _ => nucleotideCounts "" |> Expect.equalTo {a = 0, c = 0, g = 0, t = 0}),
13+
14+
test "can count one nucleotide in single-character input"
15+
(fn _ => nucleotideCounts "G" |> Expect.equalTo {a = 0, c = 0, g = 1, t = 0}),
16+
17+
test "strand with repeated nucleotide"
18+
(fn _ => nucleotideCounts "GGGGGGG" |> Expect.equalTo {a = 0, c = 0, g = 7, t = 0}),
19+
20+
test "strand with multiple nucleotides"
21+
(fn _ => nucleotideCounts "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC" |> Expect.equalTo {a = 20, c = 12, g = 17, t = 21}),
22+
23+
test "strand with invalid nucleotides"
24+
(fn _ => (fn _ => nucleotideCounts "AGXXACT") |> Expect.error (Fail "Invalid nucleotide in strand"))
25+
]
26+
27+
val _ = Test.run testsuite
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
structure Expect =
2+
struct
3+
datatype expectation = Pass | Fail of string * string
4+
5+
local
6+
fun failEq b a =
7+
Fail ("Expected: " ^ b, "Got: " ^ a)
8+
9+
fun failExn b a =
10+
Fail ("Expected: " ^ b, "Raised: " ^ a)
11+
12+
fun exnName (e: exn): string = General.exnName e
13+
in
14+
fun truthy a =
15+
if a
16+
then Pass
17+
else failEq "true" "false"
18+
19+
fun falsy a =
20+
if a
21+
then failEq "false" "true"
22+
else Pass
23+
24+
fun equalTo b a =
25+
if a = b
26+
then Pass
27+
else failEq (PolyML.makestring b) (PolyML.makestring a)
28+
29+
fun nearTo delta b a =
30+
if Real.abs (a - b) <= delta * Real.abs a orelse
31+
Real.abs (a - b) <= delta * Real.abs b
32+
then Pass
33+
else failEq (Real.toString b ^ " +/- " ^ Real.toString delta) (Real.toString a)
34+
35+
fun anyError f =
36+
(
37+
f ();
38+
failExn "an exception" "Nothing"
39+
) handle _ => Pass
40+
41+
fun error e f =
42+
(
43+
f ();
44+
failExn (exnName e) "Nothing"
45+
) handle e' => if exnMessage e' = exnMessage e
46+
then Pass
47+
else failExn (exnMessage e) (exnMessage e')
48+
end
49+
end
50+
51+
structure TermColor =
52+
struct
53+
datatype color = Red | Green | Yellow | Normal
54+
55+
fun f Red = "\027[31m"
56+
| f Green = "\027[32m"
57+
| f Yellow = "\027[33m"
58+
| f Normal = "\027[0m"
59+
60+
fun colorize color s = (f color) ^ s ^ (f Normal)
61+
62+
val redit = colorize Red
63+
64+
val greenit = colorize Green
65+
66+
val yellowit = colorize Yellow
67+
end
68+
69+
structure Test =
70+
struct
71+
datatype testnode = TestGroup of string * testnode list
72+
| Test of string * (unit -> Expect.expectation)
73+
74+
local
75+
datatype evaluation = Success of string
76+
| Failure of string * string * string
77+
| Error of string * string
78+
79+
fun indent n s = (implode (List.tabulate (n, fn _ => #" "))) ^ s
80+
81+
fun fmt indentlvl ev =
82+
let
83+
val check = TermColor.greenit "\226\156\148 " (**)
84+
val cross = TermColor.redit "\226\156\150 " (**)
85+
val indentlvl = indentlvl * 2
86+
in
87+
case ev of
88+
Success descr => indent indentlvl (check ^ descr)
89+
| Failure (descr, exp, got) =>
90+
String.concatWith "\n" [indent indentlvl (cross ^ descr),
91+
indent (indentlvl + 2) exp,
92+
indent (indentlvl + 2) got]
93+
| Error (descr, reason) =>
94+
String.concatWith "\n" [indent indentlvl (cross ^ descr),
95+
indent (indentlvl + 2) (TermColor.redit reason)]
96+
end
97+
98+
fun eval (TestGroup _) = raise Fail "Only a 'Test' can be evaluated"
99+
| eval (Test (descr, thunk)) =
100+
(
101+
case thunk () of
102+
Expect.Pass => ((1, 0, 0), Success descr)
103+
| Expect.Fail (s, s') => ((0, 1, 0), Failure (descr, s, s'))
104+
)
105+
handle e => ((0, 0, 1), Error (descr, "Unexpected error: " ^ exnMessage e))
106+
107+
fun flatten depth testnode =
108+
let
109+
fun sum (x, y, z) (a, b, c) = (x + a, y + b, z + c)
110+
111+
fun aux (t, (counter, acc)) =
112+
let
113+
val (counter', texts) = flatten (depth + 1) t
114+
in
115+
(sum counter' counter, texts :: acc)
116+
end
117+
in
118+
case testnode of
119+
TestGroup (descr, ts) =>
120+
let
121+
val (counter, texts) = foldr aux ((0, 0, 0), []) ts
122+
in
123+
(counter, (indent (depth * 2) descr) :: List.concat texts)
124+
end
125+
| Test _ =>
126+
let
127+
val (counter, evaluation) = eval testnode
128+
in
129+
(counter, [fmt depth evaluation])
130+
end
131+
end
132+
133+
fun println s = print (s ^ "\n")
134+
in
135+
fun run suite =
136+
let
137+
val ((succeeded, failed, errored), texts) = flatten 0 suite
138+
139+
val summary = String.concatWith ", " [
140+
TermColor.greenit ((Int.toString succeeded) ^ " passed"),
141+
TermColor.redit ((Int.toString failed) ^ " failed"),
142+
TermColor.redit ((Int.toString errored) ^ " errored"),
143+
(Int.toString (succeeded + failed + errored)) ^ " total"
144+
]
145+
146+
val status = if failed = 0 andalso errored = 0
147+
then OS.Process.success
148+
else OS.Process.failure
149+
150+
in
151+
List.app println texts;
152+
println "";
153+
println ("Tests: " ^ summary);
154+
OS.Process.exit status
155+
end
156+
end
157+
end
158+
159+
fun describe description tests = Test.TestGroup (description, tests)
160+
fun test description thunk = Test.Test (description, thunk)

0 commit comments

Comments
 (0)