seanstappas
diff --git a/‎README.md
+50-18 b/‎README.md
+50-18
diff --git a/‎agent.py
-1 b/‎agent.py
-1
diff --git a/‎main.py
+44-91 b/‎main.py
+44-91
diff --git a/‎pickle/data_block_N.pkl
60.1 KB b/‎pickle/data_block_N.pkl
60.1 KB
diff --git a/‎pickle/data_block_Q.pkl
77 KB b/‎pickle/data_block_Q.pkl
77 KB
diff --git a/‎pickle/data_enemy_N.pkl
4.38 KB b/‎pickle/data_enemy_N.pkl
4.38 KB
diff --git a/‎pickle/data_enemy_Q.pkl
5.79 KB b/‎pickle/data_enemy_Q.pkl
5.79 KB
diff --git a/‎pickle/data_friendly_N.pkl
1.28 KB b/‎pickle/data_friendly_N.pkl
1.28 KB
diff --git a/‎pickle/data_friendly_Q.pkl
1.75 KB b/‎pickle/data_friendly_Q.pkl
1.75 KB
diff --git a/‎pickle/subsumption_dangerous_combined_459_600_block_N.pkl
60.1 KB b/‎pickle/subsumption_dangerous_combined_459_600_block_N.pkl
60.1 KB
diff --git a/‎pickle/subsumption_dangerous_combined_459_600_block_Q.pkl
77 KB b/‎pickle/subsumption_dangerous_combined_459_600_block_Q.pkl
77 KB
diff --git a/‎pickle/subsumption_dangerous_combined_459_600_enemy_N.pkl
4.38 KB b/‎pickle/subsumption_dangerous_combined_459_600_enemy_N.pkl
4.38 KB
diff --git a/‎pickle/subsumption_dangerous_combined_459_600_enemy_Q.pkl
5.79 KB b/‎pickle/subsumption_dangerous_combined_459_600_enemy_Q.pkl
5.79 KB
diff --git a/‎pickle/subsumption_dangerous_combined_459_600_friendly_N.pkl
1.28 KB b/‎pickle/subsumption_dangerous_combined_459_600_friendly_N.pkl
1.28 KB
diff --git a/‎pickle/subsumption_dangerous_combined_459_600_friendly_Q.pkl
1.75 KB b/‎pickle/subsumption_dangerous_combined_459_600_friendly_Q.pkl
1.75 KB
diff --git a/‎plotter.py
-1 b/‎plotter.py
-1
@@ -1,23 +1,22 @@
 # Reinforcement Learning
-Plays the Q*bert game by reinforcement learning. This is assignment 3 of the ECSE-526 class, as described [here](http://www.cim.mcgill.ca/~jer/courses/ai/assignments/as3.html).
+Plays the Qbert game by reinforcement learning. This is assignment 3 of the ECSE-526 class, as described [here](http://www.cim.mcgill.ca/~jer/courses/ai/assignments/as3.html).
 
 ## Installation
 
 ### Library Dependencies
 
-## Usage
+The main dependency of the program is `numpy`, which can be installed via `pip`. For plotting, `matplotlib` is used.
 
-### Example Commands
+## Usage
 
-Here are some examples of running the program:
+To run the program, use the command-line interface in `main.py`, as follows:
 
 ```
-
+python main.py
 ```
 
-### Help
 
-To run the program, use the command-line interface in `main.py`. To see the list of available commands, run the following:
+To see the list of available commands, run the following:
 
 ```
 python main.py --help
@@ -26,25 +25,58 @@ python main.py --help
 This will print the following:
 
 ```
-
+usage: main.py [-h] [-l {info,debug,critical,warn,error}] [-e NUM_EPISODES]
+               [-o LOAD_LEARNING_FILENAME] [-f SAVE_LEARNING_FILENAME]
+               [-p PLOT_FILENAME] [-c CSV_FILENAME] [-d DISPLAY_SCREEN]
+               [-s {simple,verbose}]
+               [-a {block,enemy,friendly,subsumption,combined_verbose}]
+               [-x {random,optimistic,combined}]
+               [-m {manhattan,hamming,same_result}] [-r RANDOM_SEED]
+               [-i SHOW_IMAGE]
+
+Reinforcement Learning with Qbert.
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -l {info,debug,critical,warn,error}, --logging_level {info,debug,critical,warn,error}
+                        The logging level.
+  -e NUM_EPISODES, --num_episodes NUM_EPISODES
+                        The number of training episodes.
+  -o LOAD_LEARNING_FILENAME, --load_learning_filename LOAD_LEARNING_FILENAME
+                        The pickle file to load learning data from. To run the
+                        agent with pre-trained Q data, set this parameter to
+                        'data'
+  -f SAVE_LEARNING_FILENAME, --save_learning_filename SAVE_LEARNING_FILENAME
+                        The pickle file to save learning data to.
+  -p PLOT_FILENAME, --plot_filename PLOT_FILENAME
+                        The filename to save a score plot to.
+  -c CSV_FILENAME, --csv_filename CSV_FILENAME
+                        The filename to save a score CSV file to.
+  -d DISPLAY_SCREEN, --display_screen DISPLAY_SCREEN
+                        Whether to display the ALE screen.
+  -s {simple,verbose}, --state_representation {simple,verbose}
+                        The state representation to use.
+  -a {block,enemy,friendly,subsumption,combined_verbose}, --agent_type {block,enemy,friendly,subsumption,combined_verbose}
+                        The agent type to use.
+  -x {random,optimistic,combined}, --exploration {random,optimistic,combined}
+                        The exploration mode to use.
+  -m {manhattan,hamming,same_result}, --distance_metric {manhattan,hamming,same_result}
+                        The distance metric to use.
+  -r RANDOM_SEED, --random_seed RANDOM_SEED
+                        The random seed to use.
+  -i SHOW_IMAGE, --show_image SHOW_IMAGE
+                        Whether to show a screenshot at the end of every
+                        episode.
 ```
 
 ### Default Values
 
-Here are the default values for all the optional arguments:
-
-Argument | Default Value
---- | ---
-`--a` | `a`
-`--b` | `b`
-`--c` | `c/`
-`--d` | d
-`--e` | `e`
-`--f` | f
+The default values of all the parameters can be found in the `main.py` file.
 
 
 ## Code Organization
 
+The bulk of the code can be found the `actions.py`, `agent.py`, `learner.py` and `world.py` files.
 
 ## Report
 
 
@@ -46,7 +46,6 @@ def __init__(self, agent_type='subsumption', random_seed=123, frame_skip=4, repe
             self.agent = QbertCombinedVerboseAgent(random_seed, frame_skip, repeat_action_probability, sound,
                                                    display_screen, alpha, gamma, epsilon, unexplored_threshold,
                                                    unexplored_reward, exploration, distance_metric)
-
         self.world = self.agent.world
 
     def action(self):
 
@@ -21,6 +21,9 @@ def play_learning_agent(num_episodes=2, show_image=False, load_learning_filename
                         save_learning_filename=None, plot_filename=None, csv_filename=None, display_screen=False,
                         state_representation='simple', agent_type='subsumption', exploration=None,
                         distance_metric=None, random_seed=123):
+    """
+    Let the learning agent play with the specified parameters.
+    """
     logging.info('Plot filename: {}'.format(plot_filename))
     logging.info('Agent type: {}'.format(agent_type))
     logging.info('Distance metric: {}'.format(distance_metric))
@@ -68,105 +71,55 @@ def setup_logging(level):
         datefmt='%d-%m-%Y:%H:%M:%S',
         level=LOGGING_LEVELS[level])
 
-
 def parse_command_line_arguments():
     """
     Parse the command-line arguments provided by the user.
     """
-    parser = ArgumentParser(description='Reinforcement Learning with Q*bert.')
+    parser = ArgumentParser(description='Reinforcement Learning with Qbert.')
     parser.add_argument('-l', '--logging_level', default='info', choices=LOGGING_LEVELS.keys(),
                         help='The logging level.')
-
-    subparsers = parser.add_subparsers()
-
-    args = parser.parse_args()
+    parser.add_argument('-e', '--num_episodes', default=100, type=int, help='The number of training episodes.')
+    parser.add_argument('-o', '--load_learning_filename', default=None,
+                        help="The pickle file to load learning data from. To run the agent with pre-trained Q data, set"
+                             " this parameter to 'data'")
+    parser.add_argument('-f', '--save_learning_filename', default=None,
+                        help='The pickle file to save learning data to.')
+    parser.add_argument('-p', '--plot_filename', default=None,
+                        help='The filename to save a score plot to.')
+    parser.add_argument('-c', '--csv_filename', default=None,
+                        help='The filename to save a score CSV file to.')
+    parser.add_argument('-d', '--display_screen', default=False, type=bool,
+                        help='Whether to display the ALE screen.')
+    parser.add_argument('-s', '--state_representation', default='simple', choices=['simple', 'verbose'],
+                        help='The state representation to use.')
+    parser.add_argument('-a', '--agent_type', default='subsumption',
+                        choices=['block', 'enemy', 'friendly', 'subsumption', 'combined_verbose'],
+                        help='The agent type to use.')
+    parser.add_argument('-x', '--exploration', default='combined', choices=['random', 'optimistic', 'combined'],
+                        help='The exploration mode to use.')
+    parser.add_argument('-m', '--distance_metric', default=None, choices=['manhattan', 'hamming', 'same_result'],
+                        help='The distance metric to use.')
+    parser.add_argument('-r', '--random_seed', default=None, type=int,
+                        help='The random seed to use.')
+    parser.add_argument('-i', '--show_image', default=False, type=bool,
+                        help='Whether to show a screenshot at the end of every episode.')
+
+    args = parser.parse_args('-help'.split())
     setup_logging(args.logging_level)
-    args.func(args)
-
-
-def save_generalization_results():
-    distance_metric = 'no_generalization'
-    play_learning_agent(num_episodes=100, plot_filename=distance_metric, csv_filename=distance_metric,
-                        display_screen=False, agent_type='combined_verbose', exploration=None, distance_metric=None)
-
-    distance_metric = 'manhattan'
-    play_learning_agent(num_episodes=100, plot_filename=distance_metric, csv_filename=distance_metric,
-                        display_screen=False, agent_type='combined_verbose', exploration=None,
-                        distance_metric=distance_metric)
-
-    distance_metric = 'hamming'
-    play_learning_agent(num_episodes=100, plot_filename=distance_metric, csv_filename=distance_metric,
-                        display_screen=False, agent_type='combined_verbose', exploration=None,
-                        distance_metric=distance_metric)
-
-    distance_metric = 'same_result'
-    play_learning_agent(num_episodes=100, plot_filename=distance_metric, csv_filename=distance_metric,
-                        display_screen=False, agent_type='combined_verbose', exploration=None,
-                        distance_metric=distance_metric)
-
-    filename = 'subsumption_generalization'
-    play_learning_agent(num_episodes=100, plot_filename=filename, csv_filename=filename,
-                        display_screen=False, agent_type='subsumption', exploration=None,
-                        distance_metric=None, save_learning_filename='subsumption_dangerous_no_exploration')
-
-
-def save_exploration_results():
-    filename = 'subsumption_random'
-    play_learning_agent(num_episodes=100, plot_filename=filename, csv_filename=filename,
-                        display_screen=False, agent_type='subsumption', exploration='random',
-                        distance_metric=None, save_learning_filename='subsumption_dangerous_random')
-
-    filename = 'subsumption_optimistic'
-    play_learning_agent(num_episodes=100, plot_filename=filename, csv_filename=filename,
-                        display_screen=False, agent_type='subsumption', exploration='optimistic',
-                        distance_metric=None, save_learning_filename='subsumption_dangerous_optimistic')
-
-    filename = 'subsumption_combined'
-    play_learning_agent(num_episodes=100, plot_filename=filename, csv_filename=filename,
-                        display_screen=False, agent_type='subsumption', exploration='combined',
-                        distance_metric=None, save_learning_filename='subsumption_dangerous_combined')
-
-
-def save_performance_results():
-    filename = 'seed123'
-    play_learning_agent(num_episodes=100, plot_filename=filename, csv_filename=filename,
-                        display_screen=False, agent_type='subsumption', exploration='combined',
-                        distance_metric=None, save_learning_filename='subsumption_dangerous_combined_123',
-                        random_seed=123)
-
-    filename = 'seed459'
-    play_learning_agent(num_episodes=100, plot_filename=filename, csv_filename=filename,
-                        display_screen=False, agent_type='subsumption', exploration='combined',
-                        distance_metric=None, save_learning_filename='subsumption_dangerous_combined_459',
-                        random_seed=459)
-
-    filename = 'seed598'
-    play_learning_agent(num_episodes=100, plot_filename=filename, csv_filename=filename,
-                        display_screen=False, agent_type='subsumption', exploration='combined',
-                        distance_metric=None, save_learning_filename='subsumption_dangerous_combined_598',
-                        random_seed=459)
-
-
-def continued_learning():
-    filename = 'seed459_600'
-    play_learning_agent(num_episodes=100, plot_filename=filename, csv_filename=filename,
-                        display_screen=False, agent_type='subsumption', exploration='combined',
-                        distance_metric=None, save_learning_filename='subsumption_dangerous_combined_459_600',
-                        random_seed=459, load_learning_filename='subsumption_dangerous_combined_459_500')
-
-
-def sample_play():
-    play_learning_agent(num_episodes=100,
-                        display_screen=True, agent_type='subsumption', exploration='combined',
-                        distance_metric=None,
-                        random_seed=459, load_learning_filename='subsumption_dangerous_combined_459_400')
+    play_learning_agent(num_episodes=args.num_episodes,
+                        load_learning_filename=args.load_learning_filename,
+                        save_learning_filename=args.save_learning_filename,
+                        plot_filename=args.plot_filename,
+                        csv_filename=args.csv_filename,
+                        display_screen=args.display_screen,
+                        state_representation=args.state_representation,
+                        agent_type=args.agent_type,
+                        exploration=args.exploration,
+                        distance_metric=args.distance_metric,
+                        random_seed=args.random_seed,
+                        show_image=args.show_image)
 
 
 if __name__ == '__main__':
     setup_logging('info')
-    # play_learning_agent()
-    # save_generalization_results()
-    # save_exploration_results()
-    # save_performance_results()
-    continued_learning()
-    # sample_play()
+    parse_command_line_arguments()
@@ -21,7 +21,6 @@ def plot_scores(scores, filename):
     x_smooth = np.linspace(1, len(scores), 200)
     y_smooth = spline(x_points, y_points, x_smooth)
 
-    # plt.plot(x_points, y_points, 'o', label='Data')
     plt.plot(x_smooth, y_smooth, 'C0', label='Score')
     plt.xlabel('Number of episodes')
     plt.ylabel('Score')