@@ -227,9 +227,34 @@ t_propose_action EpsilonGreedyAgent::propose_action() {
227
227
if (rng_.frand () < epsilon_) {
228
228
/* Explore
229
229
* With probability epsilon, choose randomly amongst all move types */
230
+
231
+ // Cummulative epsilon action probabilty stores a CDF for all available
232
+ // actions where each action has an equal probability to occur. Pick
233
+ // a random number between 0 and 1 and select the action in the CDF equal
234
+ // to or just less than the random number.
235
+ // For example, for four actions:
236
+ // A B C D
237
+ // [0.25, 0.5, 0.75, 1.0]
238
+ // Here, if the random number is 0.2, action A would be chosen.
239
+ // if the random number is 0.5, action B would be chosen.
240
+ // if the random number is 0.6, action C would be chosen.
241
+ // if the random number is 1.0, action D would be chosen.
230
242
float p = rng_.frand ();
231
243
auto itr = std::lower_bound (cumm_epsilon_action_prob_.begin (), cumm_epsilon_action_prob_.end (), p);
232
- auto action_type_q_pos = itr - cumm_epsilon_action_prob_.begin ();
244
+ size_t action_type_q_pos;
245
+ if (itr != cumm_epsilon_action_prob_.end ()) {
246
+ action_type_q_pos = itr - cumm_epsilon_action_prob_.begin ();
247
+ } else {
248
+ // Due to numerical precision (and dumb luck) its possible that the
249
+ // CDF does not fully add up to 1.0 (for example 0.9999) and the
250
+ // random number chosen is 1.0. In this case, no action will be
251
+ // chosen. In this case, just choose the last action.
252
+ // For example, for three actions:
253
+ // [0.33, 0.66, 0.99]
254
+ // Notice that the last action does not perfectly add up to 1.0. To
255
+ // get around this, we just pretend that it rounded up to 1.0.
256
+ action_type_q_pos = num_available_actions_ - 1 ;
257
+ }
233
258
// Mark the q_table location that agent used to update its value after processing the move outcome
234
259
last_action_ = action_type_q_pos;
235
260
0 commit comments