Changed model to save only trainable variables. Added graph visualization.

NikolaMr · NikolaMr · commit 91b3cc79a9b1 · 2018-06-09T08:49:24.000+02:00
diff --git a/Asynchronous/A3C.py b/Asynchronous/A3C.py
@@ -18,10 +18,10 @@
 ENV_NAME = 'BreakoutDeterministic-v4'
 #ENV_NAME = 'PongDeterministic-v4'
 #MAX_ITERATIONS = 100000000
-MAX_EP_LENGTH = 1000
+MAX_EP_LENGTH = 100000
 #MAX_LEARNING_TIME = 7 * 60 * 60 # 7 hours
 LEARNING_RATE = 1e-4
-CLIP_VALUE = 10.0
+CLIP_VALUE = 2.0
 
 def process_frame(x_t, img_rows, img_cols):
     x_t = skimage.color.rgb2gray(x_t)
@@ -366,12 +366,13 @@ def global_saving_thread(agent, sess):
 
     global global_counter
 
-    MAX_MODELS = 3
+    MAX_MODELS = 1000
     cnt_model = 0
 
     with sess.as_default(), sess.graph.as_default():
 
-        saver = tf.train.Saver()
+        collection = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, GLOBAL_SCOPE)
+        saver = tf.train.Saver(collection)
 
         elapsed_time = time.time() - start_time
 
diff --git a/Asynchronous/A3C_no_lstm.py b/Asynchronous/A3C_no_lstm.py
@@ -8,18 +8,18 @@
 
 IMG_WIDTH = 105
 IMG_HEIGHT = 80
-CNT_FRAMES = 4
+CNT_FRAMES = 3
 GLOBAL_SCOPE = 'global'
 VALUE_MODIFIER = 0.5*1e0
 POLICY_MODIFIER = 1*1e0
-ENTROPY_MODIFIER = 5*1e-3#2.5e-5#0.0005
+ENTROPY_MODIFIER = 2.5*1e-1#2.5e-5#0.0005
 MAX_STEPS = 30
 DISCOUNT = 0.99
 ENV_NAME = 'BreakoutDeterministic-v4'
 #ENV_NAME = 'PongDeterministic-v4'
-MAX_EP_LENGTH = 1000
+MAX_EP_LENGTH = 100000
 LEARNING_RATE = 1e-4
-CLIP_VALUE = 10.0
+CLIP_VALUE = 2.0
 DECAY = 0.99
 def process_frame(x_t, img_rows, img_cols):
     x_t = skimage.color.rgb2gray(x_t)
@@ -88,7 +88,7 @@ def __build_model(self):
 
             #normalization = tf.layers.batch_normalization(embedding)
 
-            self.policy = tf.contrib.layers.fully_connected(embedding, self.action_size, activation_fn=tf.nn.softmax, weights_initializer=tf.random_normal_initializer(stddev=0.5), biases_initializer=None,\
+            self.policy = tf.contrib.layers.fully_connected(embedding, self.action_size, activation_fn=tf.nn.softmax, weights_initializer=tf.random_normal_initializer(stddev=0.05), biases_initializer=None,\
                                                            scope='fc_policy')
             self.value = tf.contrib.layers.fully_connected(\
                                                            embedding, \
diff --git a/Asynchronous/start_tensorboard.sh b/Asynchronous/start_tensorboard.sh
@@ -1 +1 @@
-tensorboard --logdir=local0:'./local0',local1:'./local1',local2:'./local2',local3:'./local3',local4:'./local4',local5:'./local5'
+tensorboard --logdir=local0:'./local0',local1:'./local1',local2:'./local2',local3:'./local3',local4:'./local4',local5:'./local5',graph:'./graph'

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-tensorboard --logdir=local0:'./local0',local1:'./local1',local2:'./local2',local3:'./local3',local4:'./local4',local5:'./local5'`
	`1`	`+tensorboard --logdir=local0:'./local0',local1:'./local1',local2:'./local2',local3:'./local3',local4:'./local4',local5:'./local5',graph:'./graph'`