|
| 1 | +"""Tests for catsim.cat module.""" |
| 2 | + |
| 3 | +import numpy as np |
| 4 | +import pytest |
| 5 | + |
| 6 | +from catsim import cat |
| 7 | +from catsim.item_bank import ItemBank |
| 8 | + |
| 9 | + |
| 10 | +class TestDodd: |
| 11 | + """Tests for the dodd() function.""" |
| 12 | + |
| 13 | + def test_dodd_correct_response(self) -> None: |
| 14 | + """Test dodd heuristic when examinee answers correctly.""" |
| 15 | + item_bank = ItemBank.generate_item_bank(10) |
| 16 | + theta = 0.0 |
| 17 | + new_theta = cat.dodd(theta, item_bank, correct=True) |
| 18 | + # When correct, theta should increase toward max difficulty |
| 19 | + assert new_theta > theta |
| 20 | + |
| 21 | + def test_dodd_incorrect_response(self) -> None: |
| 22 | + """Test dodd heuristic when examinee answers incorrectly.""" |
| 23 | + item_bank = ItemBank.generate_item_bank(10) |
| 24 | + theta = 0.0 |
| 25 | + new_theta = cat.dodd(theta, item_bank, correct=False) |
| 26 | + # When incorrect, theta should decrease toward min difficulty |
| 27 | + assert new_theta < theta |
| 28 | + |
| 29 | + def test_dodd_correct_formula(self) -> None: |
| 30 | + """Test that dodd follows the correct formula.""" |
| 31 | + item_bank = ItemBank.generate_item_bank(10) |
| 32 | + theta = 0.5 |
| 33 | + b = item_bank.difficulty |
| 34 | + b_max = max(b) |
| 35 | + b_min = min(b) |
| 36 | + |
| 37 | + # Correct response |
| 38 | + expected_correct = theta + ((b_max - theta) / 2) |
| 39 | + assert cat.dodd(theta, item_bank, correct=True) == pytest.approx(expected_correct) |
| 40 | + |
| 41 | + # Incorrect response |
| 42 | + expected_incorrect = theta - ((theta - b_min) / 2) |
| 43 | + assert cat.dodd(theta, item_bank, correct=False) == pytest.approx(expected_incorrect) |
| 44 | + |
| 45 | + def test_dodd_at_max_difficulty(self) -> None: |
| 46 | + """Test dodd when theta equals max difficulty.""" |
| 47 | + item_bank = ItemBank.generate_item_bank(10) |
| 48 | + b_max = max(item_bank.difficulty) |
| 49 | + # When theta = b_max and correct, result should equal theta (no movement) |
| 50 | + new_theta = cat.dodd(b_max, item_bank, correct=True) |
| 51 | + assert new_theta == pytest.approx(b_max) |
| 52 | + |
| 53 | + def test_dodd_at_min_difficulty(self) -> None: |
| 54 | + """Test dodd when theta equals min difficulty.""" |
| 55 | + item_bank = ItemBank.generate_item_bank(10) |
| 56 | + b_min = min(item_bank.difficulty) |
| 57 | + # When theta = b_min and incorrect, result should equal theta (no movement) |
| 58 | + new_theta = cat.dodd(b_min, item_bank, correct=False) |
| 59 | + assert new_theta == pytest.approx(b_min) |
| 60 | + |
| 61 | + |
| 62 | +class TestBias: |
| 63 | + """Tests for the bias() function.""" |
| 64 | + |
| 65 | + def test_bias_zero(self) -> None: |
| 66 | + """Test that identical arrays produce zero bias.""" |
| 67 | + actual = [1.0, 2.0, 3.0] |
| 68 | + predicted = [1.0, 2.0, 3.0] |
| 69 | + assert cat.bias(actual, predicted) == pytest.approx(0.0) |
| 70 | + |
| 71 | + def test_bias_positive(self) -> None: |
| 72 | + """Test positive bias (overestimation).""" |
| 73 | + actual = [0.0, 0.0, 0.0] |
| 74 | + predicted = [1.0, 1.0, 1.0] |
| 75 | + assert cat.bias(actual, predicted) == pytest.approx(1.0) |
| 76 | + |
| 77 | + def test_bias_negative(self) -> None: |
| 78 | + """Test negative bias (underestimation).""" |
| 79 | + actual = [1.0, 1.0, 1.0] |
| 80 | + predicted = [0.0, 0.0, 0.0] |
| 81 | + assert cat.bias(actual, predicted) == pytest.approx(-1.0) |
| 82 | + |
| 83 | + def test_bias_mixed(self) -> None: |
| 84 | + """Test bias with mixed over and underestimation.""" |
| 85 | + actual = [0.0, 0.0, 0.0, 0.0] |
| 86 | + predicted = [1.0, -1.0, 2.0, -2.0] |
| 87 | + # Mean of differences: (1 + (-1) + 2 + (-2)) / 4 = 0 |
| 88 | + assert cat.bias(actual, predicted) == pytest.approx(0.0) |
| 89 | + |
| 90 | + def test_bias_with_numpy_arrays(self) -> None: |
| 91 | + """Test bias with numpy arrays.""" |
| 92 | + actual = np.array([1.0, 2.0, 3.0]) |
| 93 | + predicted = np.array([1.5, 2.5, 3.5]) |
| 94 | + assert cat.bias(actual, predicted) == pytest.approx(0.5) |
| 95 | + |
| 96 | + def test_bias_different_sizes_raises(self) -> None: |
| 97 | + """Test that different sized arrays raise ValueError.""" |
| 98 | + actual = [1.0, 2.0, 3.0] |
| 99 | + predicted = [1.0, 2.0] |
| 100 | + with pytest.raises(ValueError, match="same size"): |
| 101 | + cat.bias(actual, predicted) |
| 102 | + |
| 103 | + |
| 104 | +class TestMse: |
| 105 | + """Tests for the mse() function.""" |
| 106 | + |
| 107 | + def test_mse_zero(self) -> None: |
| 108 | + """Test that identical arrays produce zero MSE.""" |
| 109 | + actual = [1.0, 2.0, 3.0] |
| 110 | + predicted = [1.0, 2.0, 3.0] |
| 111 | + assert cat.mse(actual, predicted) == pytest.approx(0.0) |
| 112 | + |
| 113 | + def test_mse_positive(self) -> None: |
| 114 | + """Test MSE calculation with differences.""" |
| 115 | + actual = [0.0, 0.0, 0.0] |
| 116 | + predicted = [1.0, 2.0, 3.0] |
| 117 | + # MSE = (1^2 + 2^2 + 3^2) / 3 = (1 + 4 + 9) / 3 = 14/3 |
| 118 | + assert cat.mse(actual, predicted) == pytest.approx(14.0 / 3.0) |
| 119 | + |
| 120 | + def test_mse_negative_differences(self) -> None: |
| 121 | + """Test that negative differences are squared (always positive).""" |
| 122 | + actual = [1.0, 2.0, 3.0] |
| 123 | + predicted = [0.0, 0.0, 0.0] |
| 124 | + # Same MSE as test_mse_positive due to squaring |
| 125 | + assert cat.mse(actual, predicted) == pytest.approx(14.0 / 3.0) |
| 126 | + |
| 127 | + def test_mse_with_numpy_arrays(self) -> None: |
| 128 | + """Test MSE with numpy arrays.""" |
| 129 | + actual = np.array([0.0, 0.0]) |
| 130 | + predicted = np.array([1.0, 1.0]) |
| 131 | + assert cat.mse(actual, predicted) == pytest.approx(1.0) |
| 132 | + |
| 133 | + def test_mse_different_sizes_raises(self) -> None: |
| 134 | + """Test that different sized arrays raise ValueError.""" |
| 135 | + actual = [1.0, 2.0, 3.0] |
| 136 | + predicted = [1.0, 2.0] |
| 137 | + with pytest.raises(ValueError, match="same size"): |
| 138 | + cat.mse(actual, predicted) |
| 139 | + |
| 140 | + |
| 141 | +class TestRmse: |
| 142 | + """Tests for the rmse() function.""" |
| 143 | + |
| 144 | + def test_rmse_zero(self) -> None: |
| 145 | + """Test that identical arrays produce zero RMSE.""" |
| 146 | + actual = [1.0, 2.0, 3.0] |
| 147 | + predicted = [1.0, 2.0, 3.0] |
| 148 | + assert cat.rmse(actual, predicted) == pytest.approx(0.0) |
| 149 | + |
| 150 | + def test_rmse_is_sqrt_of_mse(self) -> None: |
| 151 | + """Test that RMSE equals sqrt of MSE.""" |
| 152 | + actual = [0.0, 0.0, 0.0] |
| 153 | + predicted = [1.0, 2.0, 3.0] |
| 154 | + mse_value = cat.mse(actual, predicted) |
| 155 | + rmse_value = cat.rmse(actual, predicted) |
| 156 | + assert rmse_value == pytest.approx(np.sqrt(mse_value)) |
| 157 | + |
| 158 | + def test_rmse_unit_difference(self) -> None: |
| 159 | + """Test RMSE with unit differences.""" |
| 160 | + actual = [0.0, 0.0] |
| 161 | + predicted = [1.0, 1.0] |
| 162 | + # RMSE = sqrt((1^2 + 1^2) / 2) = sqrt(1) = 1 |
| 163 | + assert cat.rmse(actual, predicted) == pytest.approx(1.0) |
| 164 | + |
| 165 | + def test_rmse_different_sizes_raises(self) -> None: |
| 166 | + """Test that different sized arrays raise ValueError.""" |
| 167 | + actual = [1.0, 2.0, 3.0] |
| 168 | + predicted = [1.0, 2.0] |
| 169 | + with pytest.raises(ValueError, match="same size"): |
| 170 | + cat.rmse(actual, predicted) |
| 171 | + |
| 172 | + |
| 173 | +class TestOverlapRate: |
| 174 | + """Tests for the overlap_rate() function.""" |
| 175 | + |
| 176 | + def test_overlap_rate_uniform_exposure(self) -> None: |
| 177 | + """Test overlap rate with uniform exposure rates.""" |
| 178 | + # All items exposed equally: variance = 0 |
| 179 | + exposure_rates = np.array([0.3, 0.3, 0.3, 0.3, 0.3]) |
| 180 | + test_size = 3 |
| 181 | + bank_size = 5 |
| 182 | + # T = (N/Q) * 0 + (Q/N) = Q/N = 3/5 = 0.6 |
| 183 | + expected = test_size / bank_size |
| 184 | + assert cat.overlap_rate(exposure_rates, test_size) == pytest.approx(expected) |
| 185 | + |
| 186 | + def test_overlap_rate_with_variance(self) -> None: |
| 187 | + """Test overlap rate with non-uniform exposure.""" |
| 188 | + exposure_rates = np.array([0.1, 0.2, 0.3, 0.4, 0.5]) |
| 189 | + test_size = 2 |
| 190 | + bank_size = 5 |
| 191 | + var_r = np.var(exposure_rates) |
| 192 | + expected = (bank_size / test_size) * var_r + (test_size / bank_size) |
| 193 | + assert cat.overlap_rate(exposure_rates, test_size) == pytest.approx(expected) |
| 194 | + |
| 195 | + def test_overlap_rate_minimum(self) -> None: |
| 196 | + """Test that minimum overlap occurs with uniform exposure.""" |
| 197 | + # Minimum overlap is Q/N when variance = 0 |
| 198 | + exposure_rates = np.array([0.5] * 10) |
| 199 | + test_size = 5 |
| 200 | + result = cat.overlap_rate(exposure_rates, test_size) |
| 201 | + assert result == pytest.approx(0.5) # 5/10 |
| 202 | + |
| 203 | + def test_overlap_rate_invalid_exposure_negative(self) -> None: |
| 204 | + """Test that negative exposure rates raise ValueError.""" |
| 205 | + exposure_rates = np.array([-0.1, 0.5, 0.5]) |
| 206 | + with pytest.raises(ValueError, match="between 0 and 1"): |
| 207 | + cat.overlap_rate(exposure_rates, 2) |
| 208 | + |
| 209 | + def test_overlap_rate_invalid_exposure_greater_than_one(self) -> None: |
| 210 | + """Test that exposure rates > 1 raise ValueError.""" |
| 211 | + exposure_rates = np.array([0.5, 1.5, 0.5]) |
| 212 | + with pytest.raises(ValueError, match="between 0 and 1"): |
| 213 | + cat.overlap_rate(exposure_rates, 2) |
| 214 | + |
| 215 | + def test_overlap_rate_invalid_test_size_zero(self) -> None: |
| 216 | + """Test that test_size=0 raises ValueError.""" |
| 217 | + exposure_rates = np.array([0.5, 0.5, 0.5]) |
| 218 | + with pytest.raises(ValueError, match="Test size must be positive"): |
| 219 | + cat.overlap_rate(exposure_rates, 0) |
| 220 | + |
| 221 | + def test_overlap_rate_invalid_test_size_negative(self) -> None: |
| 222 | + """Test that negative test_size raises ValueError.""" |
| 223 | + exposure_rates = np.array([0.5, 0.5, 0.5]) |
| 224 | + with pytest.raises(ValueError, match="Test size must be positive"): |
| 225 | + cat.overlap_rate(exposure_rates, -1) |
| 226 | + |
| 227 | + def test_overlap_rate_test_size_larger_than_bank(self) -> None: |
| 228 | + """Test that test_size > bank_size raises ValueError.""" |
| 229 | + exposure_rates = np.array([0.5, 0.5, 0.5]) |
| 230 | + with pytest.raises(ValueError, match="cannot be larger than bank size"): |
| 231 | + cat.overlap_rate(exposure_rates, 10) |
| 232 | + |
| 233 | + |
| 234 | +class TestRandomResponseVector: |
| 235 | + """Tests for the random_response_vector() function.""" |
| 236 | + |
| 237 | + def test_random_response_vector_size(self) -> None: |
| 238 | + """Test that the vector has the correct size.""" |
| 239 | + size = 10 |
| 240 | + result = cat.random_response_vector(size) |
| 241 | + assert len(result) == size |
| 242 | + |
| 243 | + def test_random_response_vector_empty(self) -> None: |
| 244 | + """Test that size=0 returns empty list.""" |
| 245 | + result = cat.random_response_vector(0) |
| 246 | + assert result == [] |
| 247 | + |
| 248 | + def test_random_response_vector_contains_booleans(self) -> None: |
| 249 | + """Test that all elements are booleans.""" |
| 250 | + result = cat.random_response_vector(20) |
| 251 | + assert all(isinstance(x, bool) for x in result) |
| 252 | + |
| 253 | + def test_random_response_vector_randomness(self) -> None: |
| 254 | + """Test that the function produces random results (statistically).""" |
| 255 | + # Generate a large sample and check that both True and False appear |
| 256 | + result = cat.random_response_vector(100) |
| 257 | + assert True in result |
| 258 | + assert False in result |
| 259 | + |
| 260 | + def test_random_response_vector_large_size(self) -> None: |
| 261 | + """Test with a larger size.""" |
| 262 | + size = 1000 |
| 263 | + result = cat.random_response_vector(size) |
| 264 | + assert len(result) == size |
| 265 | + # With 1000 samples, we should have roughly 50% True |
| 266 | + true_count = sum(result) |
| 267 | + # Allow for statistical variation (between 40% and 60%) |
| 268 | + assert 400 <= true_count <= 600 |
0 commit comments