Skip to content

Commit 876bbb8

Browse files
committed
test AI generated tests
1 parent 295d37e commit 876bbb8

File tree

10 files changed

+2646
-66
lines changed

10 files changed

+2646
-66
lines changed

tests/test_cat.py

Lines changed: 268 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,268 @@
1+
"""Tests for catsim.cat module."""
2+
3+
import numpy as np
4+
import pytest
5+
6+
from catsim import cat
7+
from catsim.item_bank import ItemBank
8+
9+
10+
class TestDodd:
11+
"""Tests for the dodd() function."""
12+
13+
def test_dodd_correct_response(self) -> None:
14+
"""Test dodd heuristic when examinee answers correctly."""
15+
item_bank = ItemBank.generate_item_bank(10)
16+
theta = 0.0
17+
new_theta = cat.dodd(theta, item_bank, correct=True)
18+
# When correct, theta should increase toward max difficulty
19+
assert new_theta > theta
20+
21+
def test_dodd_incorrect_response(self) -> None:
22+
"""Test dodd heuristic when examinee answers incorrectly."""
23+
item_bank = ItemBank.generate_item_bank(10)
24+
theta = 0.0
25+
new_theta = cat.dodd(theta, item_bank, correct=False)
26+
# When incorrect, theta should decrease toward min difficulty
27+
assert new_theta < theta
28+
29+
def test_dodd_correct_formula(self) -> None:
30+
"""Test that dodd follows the correct formula."""
31+
item_bank = ItemBank.generate_item_bank(10)
32+
theta = 0.5
33+
b = item_bank.difficulty
34+
b_max = max(b)
35+
b_min = min(b)
36+
37+
# Correct response
38+
expected_correct = theta + ((b_max - theta) / 2)
39+
assert cat.dodd(theta, item_bank, correct=True) == pytest.approx(expected_correct)
40+
41+
# Incorrect response
42+
expected_incorrect = theta - ((theta - b_min) / 2)
43+
assert cat.dodd(theta, item_bank, correct=False) == pytest.approx(expected_incorrect)
44+
45+
def test_dodd_at_max_difficulty(self) -> None:
46+
"""Test dodd when theta equals max difficulty."""
47+
item_bank = ItemBank.generate_item_bank(10)
48+
b_max = max(item_bank.difficulty)
49+
# When theta = b_max and correct, result should equal theta (no movement)
50+
new_theta = cat.dodd(b_max, item_bank, correct=True)
51+
assert new_theta == pytest.approx(b_max)
52+
53+
def test_dodd_at_min_difficulty(self) -> None:
54+
"""Test dodd when theta equals min difficulty."""
55+
item_bank = ItemBank.generate_item_bank(10)
56+
b_min = min(item_bank.difficulty)
57+
# When theta = b_min and incorrect, result should equal theta (no movement)
58+
new_theta = cat.dodd(b_min, item_bank, correct=False)
59+
assert new_theta == pytest.approx(b_min)
60+
61+
62+
class TestBias:
63+
"""Tests for the bias() function."""
64+
65+
def test_bias_zero(self) -> None:
66+
"""Test that identical arrays produce zero bias."""
67+
actual = [1.0, 2.0, 3.0]
68+
predicted = [1.0, 2.0, 3.0]
69+
assert cat.bias(actual, predicted) == pytest.approx(0.0)
70+
71+
def test_bias_positive(self) -> None:
72+
"""Test positive bias (overestimation)."""
73+
actual = [0.0, 0.0, 0.0]
74+
predicted = [1.0, 1.0, 1.0]
75+
assert cat.bias(actual, predicted) == pytest.approx(1.0)
76+
77+
def test_bias_negative(self) -> None:
78+
"""Test negative bias (underestimation)."""
79+
actual = [1.0, 1.0, 1.0]
80+
predicted = [0.0, 0.0, 0.0]
81+
assert cat.bias(actual, predicted) == pytest.approx(-1.0)
82+
83+
def test_bias_mixed(self) -> None:
84+
"""Test bias with mixed over and underestimation."""
85+
actual = [0.0, 0.0, 0.0, 0.0]
86+
predicted = [1.0, -1.0, 2.0, -2.0]
87+
# Mean of differences: (1 + (-1) + 2 + (-2)) / 4 = 0
88+
assert cat.bias(actual, predicted) == pytest.approx(0.0)
89+
90+
def test_bias_with_numpy_arrays(self) -> None:
91+
"""Test bias with numpy arrays."""
92+
actual = np.array([1.0, 2.0, 3.0])
93+
predicted = np.array([1.5, 2.5, 3.5])
94+
assert cat.bias(actual, predicted) == pytest.approx(0.5)
95+
96+
def test_bias_different_sizes_raises(self) -> None:
97+
"""Test that different sized arrays raise ValueError."""
98+
actual = [1.0, 2.0, 3.0]
99+
predicted = [1.0, 2.0]
100+
with pytest.raises(ValueError, match="same size"):
101+
cat.bias(actual, predicted)
102+
103+
104+
class TestMse:
105+
"""Tests for the mse() function."""
106+
107+
def test_mse_zero(self) -> None:
108+
"""Test that identical arrays produce zero MSE."""
109+
actual = [1.0, 2.0, 3.0]
110+
predicted = [1.0, 2.0, 3.0]
111+
assert cat.mse(actual, predicted) == pytest.approx(0.0)
112+
113+
def test_mse_positive(self) -> None:
114+
"""Test MSE calculation with differences."""
115+
actual = [0.0, 0.0, 0.0]
116+
predicted = [1.0, 2.0, 3.0]
117+
# MSE = (1^2 + 2^2 + 3^2) / 3 = (1 + 4 + 9) / 3 = 14/3
118+
assert cat.mse(actual, predicted) == pytest.approx(14.0 / 3.0)
119+
120+
def test_mse_negative_differences(self) -> None:
121+
"""Test that negative differences are squared (always positive)."""
122+
actual = [1.0, 2.0, 3.0]
123+
predicted = [0.0, 0.0, 0.0]
124+
# Same MSE as test_mse_positive due to squaring
125+
assert cat.mse(actual, predicted) == pytest.approx(14.0 / 3.0)
126+
127+
def test_mse_with_numpy_arrays(self) -> None:
128+
"""Test MSE with numpy arrays."""
129+
actual = np.array([0.0, 0.0])
130+
predicted = np.array([1.0, 1.0])
131+
assert cat.mse(actual, predicted) == pytest.approx(1.0)
132+
133+
def test_mse_different_sizes_raises(self) -> None:
134+
"""Test that different sized arrays raise ValueError."""
135+
actual = [1.0, 2.0, 3.0]
136+
predicted = [1.0, 2.0]
137+
with pytest.raises(ValueError, match="same size"):
138+
cat.mse(actual, predicted)
139+
140+
141+
class TestRmse:
142+
"""Tests for the rmse() function."""
143+
144+
def test_rmse_zero(self) -> None:
145+
"""Test that identical arrays produce zero RMSE."""
146+
actual = [1.0, 2.0, 3.0]
147+
predicted = [1.0, 2.0, 3.0]
148+
assert cat.rmse(actual, predicted) == pytest.approx(0.0)
149+
150+
def test_rmse_is_sqrt_of_mse(self) -> None:
151+
"""Test that RMSE equals sqrt of MSE."""
152+
actual = [0.0, 0.0, 0.0]
153+
predicted = [1.0, 2.0, 3.0]
154+
mse_value = cat.mse(actual, predicted)
155+
rmse_value = cat.rmse(actual, predicted)
156+
assert rmse_value == pytest.approx(np.sqrt(mse_value))
157+
158+
def test_rmse_unit_difference(self) -> None:
159+
"""Test RMSE with unit differences."""
160+
actual = [0.0, 0.0]
161+
predicted = [1.0, 1.0]
162+
# RMSE = sqrt((1^2 + 1^2) / 2) = sqrt(1) = 1
163+
assert cat.rmse(actual, predicted) == pytest.approx(1.0)
164+
165+
def test_rmse_different_sizes_raises(self) -> None:
166+
"""Test that different sized arrays raise ValueError."""
167+
actual = [1.0, 2.0, 3.0]
168+
predicted = [1.0, 2.0]
169+
with pytest.raises(ValueError, match="same size"):
170+
cat.rmse(actual, predicted)
171+
172+
173+
class TestOverlapRate:
174+
"""Tests for the overlap_rate() function."""
175+
176+
def test_overlap_rate_uniform_exposure(self) -> None:
177+
"""Test overlap rate with uniform exposure rates."""
178+
# All items exposed equally: variance = 0
179+
exposure_rates = np.array([0.3, 0.3, 0.3, 0.3, 0.3])
180+
test_size = 3
181+
bank_size = 5
182+
# T = (N/Q) * 0 + (Q/N) = Q/N = 3/5 = 0.6
183+
expected = test_size / bank_size
184+
assert cat.overlap_rate(exposure_rates, test_size) == pytest.approx(expected)
185+
186+
def test_overlap_rate_with_variance(self) -> None:
187+
"""Test overlap rate with non-uniform exposure."""
188+
exposure_rates = np.array([0.1, 0.2, 0.3, 0.4, 0.5])
189+
test_size = 2
190+
bank_size = 5
191+
var_r = np.var(exposure_rates)
192+
expected = (bank_size / test_size) * var_r + (test_size / bank_size)
193+
assert cat.overlap_rate(exposure_rates, test_size) == pytest.approx(expected)
194+
195+
def test_overlap_rate_minimum(self) -> None:
196+
"""Test that minimum overlap occurs with uniform exposure."""
197+
# Minimum overlap is Q/N when variance = 0
198+
exposure_rates = np.array([0.5] * 10)
199+
test_size = 5
200+
result = cat.overlap_rate(exposure_rates, test_size)
201+
assert result == pytest.approx(0.5) # 5/10
202+
203+
def test_overlap_rate_invalid_exposure_negative(self) -> None:
204+
"""Test that negative exposure rates raise ValueError."""
205+
exposure_rates = np.array([-0.1, 0.5, 0.5])
206+
with pytest.raises(ValueError, match="between 0 and 1"):
207+
cat.overlap_rate(exposure_rates, 2)
208+
209+
def test_overlap_rate_invalid_exposure_greater_than_one(self) -> None:
210+
"""Test that exposure rates > 1 raise ValueError."""
211+
exposure_rates = np.array([0.5, 1.5, 0.5])
212+
with pytest.raises(ValueError, match="between 0 and 1"):
213+
cat.overlap_rate(exposure_rates, 2)
214+
215+
def test_overlap_rate_invalid_test_size_zero(self) -> None:
216+
"""Test that test_size=0 raises ValueError."""
217+
exposure_rates = np.array([0.5, 0.5, 0.5])
218+
with pytest.raises(ValueError, match="Test size must be positive"):
219+
cat.overlap_rate(exposure_rates, 0)
220+
221+
def test_overlap_rate_invalid_test_size_negative(self) -> None:
222+
"""Test that negative test_size raises ValueError."""
223+
exposure_rates = np.array([0.5, 0.5, 0.5])
224+
with pytest.raises(ValueError, match="Test size must be positive"):
225+
cat.overlap_rate(exposure_rates, -1)
226+
227+
def test_overlap_rate_test_size_larger_than_bank(self) -> None:
228+
"""Test that test_size > bank_size raises ValueError."""
229+
exposure_rates = np.array([0.5, 0.5, 0.5])
230+
with pytest.raises(ValueError, match="cannot be larger than bank size"):
231+
cat.overlap_rate(exposure_rates, 10)
232+
233+
234+
class TestRandomResponseVector:
235+
"""Tests for the random_response_vector() function."""
236+
237+
def test_random_response_vector_size(self) -> None:
238+
"""Test that the vector has the correct size."""
239+
size = 10
240+
result = cat.random_response_vector(size)
241+
assert len(result) == size
242+
243+
def test_random_response_vector_empty(self) -> None:
244+
"""Test that size=0 returns empty list."""
245+
result = cat.random_response_vector(0)
246+
assert result == []
247+
248+
def test_random_response_vector_contains_booleans(self) -> None:
249+
"""Test that all elements are booleans."""
250+
result = cat.random_response_vector(20)
251+
assert all(isinstance(x, bool) for x in result)
252+
253+
def test_random_response_vector_randomness(self) -> None:
254+
"""Test that the function produces random results (statistically)."""
255+
# Generate a large sample and check that both True and False appear
256+
result = cat.random_response_vector(100)
257+
assert True in result
258+
assert False in result
259+
260+
def test_random_response_vector_large_size(self) -> None:
261+
"""Test with a larger size."""
262+
size = 1000
263+
result = cat.random_response_vector(size)
264+
assert len(result) == size
265+
# With 1000 samples, we should have roughly 50% True
266+
true_count = sum(result)
267+
# Allow for statistical variation (between 40% and 60%)
268+
assert 400 <= true_count <= 600

0 commit comments

Comments
 (0)