Overhaul generate.py

chathasphere · chathasphere · commit 135dfbc00d7f · 2020-02-04T15:50:56.000-05:00
diff --git a/generate.py b/generate.py
@@ -1,85 +1,25 @@
-import argparse, pathlib, uuid, subprocess
+import argparse, uuid, subprocess
+import torch
 from model import MusicTransformer
 from preprocess import SequenceEncoder
-import torch
-import torch.nn.functional as F
-import numpy as np
-from helpers import one_hot
-from pretty_midi import PrettyMIDI, Instrument
+from helpers import sample, write_midi
 import midi_input
-import pdb
+import yaml
 
 class GeneratorError(Exception):
     pass
 
-def write_midi(note_sequence, output_dir, filename):
-
-    #make output directory
-    pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True)
-    
-    #generate midi
-    midi = PrettyMIDI()
-    piano_track = Instrument(program=0, is_drum=False, name=filename)
-    piano_track.notes = note_sequence
-    midi.instruments.append(piano_track)
-    output_name = output_dir + f"{filename}.midi"
-    midi.write(output_name)
-
-def sample(model, sample_length, prime_sequence=[], temperature=1,
-        topk=None):
-    """
-    Generate a MIDI event sequence of a fixed length by randomly sampling from a model's distribution of sequences. Optionally, "seed" the sequence with a
-    prime. A well-trained model will create music that responds to the prime
-    and develops upon it.
-    """
-    #deactivate training mode
-    model.eval()
-    if len(prime_sequence) == 0:
-        #if no prime is provided, randomly select a starting event
-        input_sequence = [np.random.randint(model.n_tokens)]
-    else:
-        input_sequence = prime_sequence
-
-    for i in range(sample_length):
-        if torch.cuda.is_available():
-            input_tensor = torch.LongTensor(input_sequence).cuda()
-        else:
-            input_tensor = torch.LongTensor(input_sequence)
-        #add singleton dimension for the batch 
-        input_tensor = input_tensor.unsqueeze(0)
-        out = model(input_tensor)
-        probs = F.softmax(out / temperature, dim=-1)
-        #keep the probability distribution for the *next* state only
-        probs = probs[:, -1, :]
-
-        if topk is not None:
-            #sample from only the top k most probable states
-            values, indices = probs.topk(topk)
-            if torch.cuda.is_available():
-                zeros = torch.zeros(model.n_tokens).cuda()
-            else:
-                zeros = torch.zeros(model.n_tokens)
-            probs = torch.scatter(zeros, 0, indices, values)
-
-        next_char_ix = torch.multinomial(probs,1).item()
-
-        input_sequence.append(next_char_ix)
-
-    return input_sequence
-
 def main():
     parser = argparse.ArgumentParser("Script to generate MIDI tracks by sampling from a trained model.")
 
-#    parser.add_argument("--model_key", type=str, 
-#            help="key to MODEL_DICT, allowing access to the path of a saved model & its params")
+    parser.add_argument("--model_key", type=str, 
+            help="Key in saved_models/model.yaml, helps look up model arguments and path to saved checkpoint.")
     parser.add_argument("--sample_length", type=int, default=512,
             help="number of events to generate")
     parser.add_argument("--temps", nargs="+", type=float, 
             default=[1.0],
             help="space-separated list of temperatures to use when sampling")
-    parser.add_argument("--topks", nargs="+", type=int,
-            help="space-separated list of topks to use when sampling")
-    parser.add_argument("--n_trials", type=int, default=5,
+    parser.add_argument("--n_trials", type=int, default=3,
             help="number of MIDI samples to generate per experiment")
     parser.add_argument("--live_input", action='store_true', default = False,
             help="if true, take in a seed from a MIDI input controller")
@@ -91,13 +31,15 @@ def main():
 
     args=parser.parse_args()
 
-#    model_key = args.model_key
-#    if MODEL_DICT.get(model_key) is None:
-#        raise GeneratorError("model key not supplied or not recognized!")
-    model_path = "saved_models/tf_20200124"
-    model_key = "tf_20200124"
-    model_args = {"n_states": 413, "d_model": 64,
-            "dim_feedforward": 512, "n_heads": 4, "n_layers": 3}
+    model_key = args.model_key
+
+    try:
+        model_dict = yaml.safe_load(open('saved_models/model.yaml'))[model_key]
+    except:
+        raise GeneratorError(f"could not find yaml information for key {model_key}")
+
+    model_path = model_dict["path"]
+    model_args = model_dict["args"]
     try:
         state = torch.load(model_path)
     except RuntimeError:
@@ -106,7 +48,8 @@ def main():
     n_velocity_events = 32
     n_time_shift_events = 125
 
-    decoder = SequenceEncoder(n_time_shift_events, n_velocity_events)
+    decoder = SequenceEncoder(n_time_shift_events, n_velocity_events,
+           min_events=0)
 
     if args.live_input:
         print("Expecting a midi input...")
@@ -117,14 +60,10 @@ def main():
         prime_sequence = []
 
     model = MusicTransformer(**model_args)
-    model.load_state_dict(state)
+    model.load_state_dict(state, strict=False)
 
     temps = args.temps
 
-    topks = args.topks
-    if topks is None:
-        topks = [None]
-
     trial_key = str(uuid.uuid4())[:6]
     n_trials = args.n_trials
 
@@ -136,8 +75,7 @@ def main():
         note_sequence = []
         for i in range(n_trials):
             print("generating sequence")
-            output_sequence = sample(model, prime_sequence = prime_sequence,
-                    sample_length=args.sample_length, temperature=temp)
+            output_sequence = sample(model, prime_sequence = prime_sequence, sample_length=args.sample_length, temperature=temp)
             note_sequence = decoder.decode_sequence(output_sequence, 
                 verbose=True, stuck_note_duration=None)
 
diff --git a/helpers.py b/helpers.py
@@ -1,8 +1,9 @@
 import torch
 import numpy as np
-from pretty_midi import Note
+from pretty_midi import Note, PrettyMIDI, Instrument
 import torch.nn.functional as F
-import copy
+import copy, pathlib
+import pdb
 
 def vectorize(sequence):
     """
@@ -66,3 +67,45 @@ def d(tensor=None):
     if tensor is None:
         return 'cuda' if torch.cuda.is_available() else 'cpu'
     return 'cuda' if tensor.is_cuda else 'cpu'
+
+def write_midi(note_sequence, output_dir, filename):
+
+    #make output directory
+    pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True)
+
+    #generate midi
+    midi = PrettyMIDI()
+    piano_track = Instrument(program=0, is_drum=False, name=filename)
+    piano_track.notes = note_sequence
+    midi.instruments.append(piano_track)
+    output_name = output_dir + f"{filename}.midi"
+    midi.write(output_name)
+
+def sample(model, sample_length, prime_sequence=[], temperature=1):
+    """
+    Generate a MIDI event sequence of a fixed length by randomly sampling from a model's distribution of sequences. Optionally, "seed" the sequence with a prime. A well-trained model will create music that responds to the prime and develops upon it.
+    """
+    #deactivate training mode
+    model.eval()
+    if len(prime_sequence) == 0:
+        #if no prime is provided, randomly select a starting event
+        input_sequence = [np.random.randint(model.n_tokens)]
+    else:
+        input_sequence = prime_sequence.copy()
+
+    #add singleton dimension for the batch
+    input_tensor = torch.LongTensor(input_sequence).unsqueeze(0)
+
+    for i in range(sample_length):
+        #select probabilities of *next* token
+        out = model(input_tensor)[0, -1, :]
+        #out is a 1d tensor of shape (n_tokens)
+        probs = F.softmax(out / temperature, dim=0)
+        #sample prob distribution for next character
+        pdb.set_trace()
+        c = torch.multinomial(probs,1)
+        input_tensor = torch.cat([input_tensor[:,1:], c[None]], dim=1)
+        input_sequence.append(c.item())
+
+    return input_sequence
+
diff --git a/midi_input.py b/midi_input.py
@@ -69,7 +69,7 @@ def read(n_velocity_events=32, n_time_shift_events=125):
             i += 1
 
         note_sequence = quantize(note_sequence, n_velocity_events, n_time_shift_events)
-        #sigh bad practice
+
         note_sequence = vectorize(note_sequence)
         return note_sequence
 
@@ -84,10 +84,8 @@ def quantize(note_sequence, n_velocity_events, n_time_shift_events):
 
         note.velocity = (note.velocity // velocity_step) * velocity_step + 1
 
-    return note_sequence
-
-
 
+    return note_sequence
 
 if __name__ == "__main__":
     read()
diff --git a/model/attention.py b/model/attention.py
@@ -47,6 +47,8 @@ def forward(self, x, mask):
                 for w, x in zip(self.linears, (x,x,x))]
         if self.relative_pos:
             #apply same position embeddings across the batch
+            #Is it possible to apply positional self-attention over
+            #only half of all relative distances?
             Er  = self.Er[:, embedding_start:, :].unsqueeze(0)
             QEr = torch.matmul(queries, Er.transpose(-1,-2))
             QEr = self._mask_positions(QEr)
diff --git a/tests/transformer_test.py b/tests/transformer_test.py
@@ -3,7 +3,7 @@
 from preprocess import PreprocessingPipeline
 from train import train
 from model import MusicTransformer
-from generate import sample
+from helpers import sample
 
 def main():
 
diff --git a/train.py b/train.py
@@ -103,7 +103,7 @@ def train(model, training_data, validation_data,
                 averaged_loss = 0
             batch_num += 1
 
-        print(f"epoch: {e+1}/{epochs} | time: {time.time() - batch_start_time:.0f}s")
+        print(f"epoch: {e+1}/{epochs} | time: {(time.time() - batch_start_time) / 60:,.0f}m")
         shuffle(training_data)
 
         if (e + 1) % evaluate_per == 0: