merge examples and experiments

isi-nlp · Aug 17, 2021 · 6657b36 · 6657b36
1 parent 4c71d1b
commit 6657b36
Show file tree

Hide file tree

Showing 15 changed files with 73 additions and 218 deletions.
diff --git a/examples/transformer.base.yml b/examples/transformer.base.yml
diff --git a/examples/transformer.test.yml b/examples/transformer.test.yml
diff --git a/...les/experimental/example.seq2seq.conf.yml → ...nts/experimental/example.seq2seq.conf.yml b/...les/experimental/example.seq2seq.conf.yml → ...nts/experimental/example.seq2seq.conf.yml
diff --git a/examples/experimental/example.t2t.conf.yml → ...riments/experimental/example.t2t.conf.yml b/examples/experimental/example.t2t.conf.yml → ...riments/experimental/example.t2t.conf.yml
diff --git a/...perimental/seq2seq.pretrainedemb.conf.yml → ...perimental/seq2seq.pretrainedemb.conf.yml b/...perimental/seq2seq.pretrainedemb.conf.yml → ...perimental/seq2seq.pretrainedemb.conf.yml
diff --git a/...s/experimental/t2t.pretrainedemb.conf.yml → ...s/experimental/t2t.pretrainedemb.conf.yml b/...s/experimental/t2t.pretrainedemb.conf.yml → ...s/experimental/t2t.pretrainedemb.conf.yml
diff --git a/examples/tfmcls.nmt_parent.yml → experiments/tfmcls.nmt_parent.yml b/examples/tfmcls.nmt_parent.yml → experiments/tfmcls.nmt_parent.yml
diff --git a/experiments/transformer.base.yml b/experiments/transformer.base.yml
@@ -17,7 +17,7 @@ optimizer:
     - 0.9
     - 0.98
     eps: 1.0e-09
-    lr: 0.2
+    lr: 0.1
 
 schedule:
   name: noam
@@ -27,7 +27,7 @@ schedule:
     model_dim: 512
 
 criterion:
-  name: smooth_kld
+  name: smooth_kld    #options "cross_entropy", "smooth_kld", "binary_cross_entropy", "triplet_loss"
   args:
     label_smoothing: 0.1
 
@@ -37,32 +37,47 @@ prep: # data preparation
   shared_vocab: true  # true means same vocab for src and tgt, false means different vocabs
   src_len: 256   # longer sentences, decision is made as per 'truncate={true,false}'
   tgt_len: 256
+  train_src: data/train.src   # training data
+  train_tgt: data/train.tgt
   truncate: true   # what to do with longer sentences: if true truncate at src_len or tgt_len; if false filter away
-  train_src: experiments/sample-data/sampl.test.fr.tok
-  train_tgt: experiments/sample-data/sampl.test.en.tok
-  valid_src: experiments/sample-data/sampl.valid.fr.tok
-  valid_tgt: experiments/sample-data/sampl.valid.en.tok
+  valid_src: data/valid.src
+  valid_tgt: data/valid.tgt
   mono_src: []  # monolingual data for learning vocab or BPE
   mono_tgt: []
 tester:
   decoder:
-    beam_size: 4
-    batch_size: 12000  # this is for 1 beam; effective_batch_size = batch_size / beam_size
-    lp_alpha: 0.0     # length penalty
+    tune:  # If this block is missing, then tuner will not be run, and some default values are picked from the code
+      trials: 6  # number of random trials, in addition to "suggested" values
+      tune_src: data/valid.src  # dataset for tuning
+      tune_ref: data/valid.tgt
+      beam_size: [1, 4, 8]    # pool of values for beam_size
+      ensemble: [1, 5, 10]
+      lp_alpha: [0.0, 0.4, 0.6, 1.0]
+      suggested:  # list of suggested values for beam_size, ensemble, lp_alpha
+        - 1, 1, 0.0
+        - 4, 1, 0.0
+        - 4, 1, 0.6
+        - 1, 5, 0.0
+        - 4, 5, 0.0
+        - 4, 5, 0.6
+        - 1, 10, 0.0
+        - 4, 10, 0.0
+        - 4, 10, 0.6
   suit:  # suit of tests to run after the training
-    # name of test and list of src.tok, ref files (ref should be unmodified)
-    valid:
-      - experiments/sample-data/sampl.valid.fr.tok
-      - experiments/sample-data/sampl.valid.en     # reference, unmodified -- not tokenized
-    test:
-      - experiments/sample-data/sampl.test.fr.tok
-      - experiments/sample-data/sampl.test.en     # reference, unmodified -- not tokenized
+    valid:  # name of test and list of src.tok, ref files (ref should be unmodified)
+    - data/valid.src
+    - data/valid.tgt
+  # in case we want to use external de tokenizer. interface:: $detokenizer < $out > out.detok
+  # by default it uses moses-tokenizer python wrapper to perl script
+  # detokenizer: cut -f1 | python -m rtg.tool.unicode_fix -l hi -d | perl scripts/indic-tok.perl -d
 trainer:
   init_args:
     chunk_size: 10   # generation in chunks of time steps to reduce memory consumption
+    grad_accum: 1    # How many batches to accumulate gradients over
   batch_size: 4200   # not exceeding these many tokens (including paddings). in tensor2tensor it is mean batch size
   check_point: 1000  # how often to checkpoint?
   keep_models: 10   # how many checkpoints to keep on disk (small enough to save disk, large enough for checkpt averaging
   steps: 200000   # how many steps to train
+  keep_in_mem: True
 updated_at: '2019-03-09T21:15:33.707183'
 seed: 12345  # fix the manual seed of pytorch + cuda + numpy + python_stdlib RNGs.  Remove/comment this to disable
diff --git a/examples/transformer.big.yml → experiments/transformer.big.yml b/examples/transformer.big.yml → experiments/transformer.big.yml
diff --git a/examples/transformer.multilabel.yml → experiments/transformer.multilabel.yml b/examples/transformer.multilabel.yml → experiments/transformer.multilabel.yml
diff --git a/experiments/transformer.test.yml b/experiments/transformer.test.yml
@@ -1,12 +1,11 @@
-# this is a mini model for testing. See transformer.base or transformer.big
 model_args:
-  ff_size: 256
-  hid_size: 128
-  n_heads: 2
+  ff_size: 512
+  hid_size: 256
+  n_heads: 4
   attn_dropout: 0.1
   dropout: 0.2
-  enc_layers: 1
-  dec_layers: 2
+  enc_layers: 3
+  dec_layers: 3
   src_vocab: 8000
   tgt_vocab: 8000
   tied_emb: three-way
@@ -25,11 +24,11 @@ schedule:
   name: noam
   args:
     constant: 2
-    warmup: 8000
-    model_dim: 128
+    warmup: 4000
+    model_dim: 256
 
 criterion:
-  name: smooth_kld
+  name: smooth_kld    #options "cross_entropy", "smooth_kld", "binary_cross_entropy", "triplet_loss"
   args:
     label_smoothing: 0.1
 
@@ -39,31 +38,43 @@ prep:
   shared_vocab: true
   src_len: 128
   tgt_len: 128
-  train_src: experiments/sample-data/sampl.test.fr.tok
-  train_tgt: experiments/sample-data/sampl.test.en.tok
-  truncate: false
-  valid_src: experiments/sample-data/sampl.valid.fr.tok
-  valid_tgt: experiments/sample-data/sampl.valid.en.tok
+  train_src: data/train.src
+  train_tgt: data/train.tgt
+  truncate: true
+  valid_src: data/valid.src
+  valid_tgt: data/valid.tgt
   mono_src: []
   mono_tgt: []
 tester:
   decoder:
-    beam_size: 4
-    batch_size: 12000  # this is for 1 beam; effective_batch_size = batch_size / beam_size
-    lp_alpha: 0.0     # length penalty
+    tune:
+      trials: 6
+      tune_src: data/valid.src
+      tune_ref: data/valid.tgt
+      beam_size: [1, 4, 8]
+      ensemble: [1, 5, 10]
+      lp_alpha: [0.0, 0.6]
+      suggested:
+        - 1, 1, 0.0
+        - 4, 1, 0.0
+        - 4, 1, 0.6
+        - 1, 5, 0.0
+        - 4, 5, 0.0
+        - 4, 5, 0.6
+        - 1, 10, 0.0
+        - 4, 10, 0.0
+        - 4, 10, 0.6
   suit:
     valid:
-      - experiments/sample-data/sampl.valid.fr.tok
-      - experiments/sample-data/sampl.valid.en     # reference, unmodified -- not tokenized
-    test:
-      - experiments/sample-data/sampl.test.fr.tok
-      - experiments/sample-data/sampl.test.en     # reference, unmodified -- not tokenized-
+    - data/valid.src
+    - data/valid.tgt
 trainer:
   init_args:
     chunk_size: 10
-  batch_size: 512
-  check_point: 200
+    grad_accum: 1    # How many batches to accumulate gradients over
+  batch_size: 1024
+  check_point: 400
   keep_models: 10
-  steps: 1000
+  steps: 2000
 updated_at: '2019-03-09T21:15:33.707183'
 seed: 12345
diff --git a/examples/uwsgi.ini → experiments/uwsgi.ini b/examples/uwsgi.ini → experiments/uwsgi.ini
diff --git a/examples/wmt/transformer.de-en.yml → experiments/wmt/transformer.de-en.yml b/examples/wmt/transformer.de-en.yml → experiments/wmt/transformer.de-en.yml
diff --git a/examples/wvartransformer.base.yml → experiments/wvartransformer.base.yml b/examples/wvartransformer.base.yml → experiments/wvartransformer.base.yml
diff --git a/tests/test_tfmcls.py b/tests/test_tfmcls.py
@@ -2,21 +2,16 @@
 #
 # Author: Thamme Gowda [tg (at) isi (dot) edu] 
 # Created: 6/15/21
-from rtg.emb import tfmcls
-from rtg.registry import log
-import subprocess
+import random
+from pathlib import Path
 
 from torchtext.datasets import DBpedia
-from pathlib import Path
-import random
+
 import rtg
-from rtg.registry import registry, MODEL
-import pytest
-from rtg.pipeline import Pipeline, Experiment
-import tempfile
 from rtg.exp import load_conf
-import torch
-import shutil
+from rtg.pipeline import Pipeline
+from rtg.registry import log
+from rtg.registry import registry, MODEL
 from . import sanity_check_experiment
 
 
@@ -72,6 +67,3 @@ def test_tfmcls_model():
     sanity_check_experiment(exp, samples=False, shared_vocab=False)
     print(f"Cleaning up {tmp_dir}")
     # shutil.rmtree(tmp_dir, ignore_errors=True)
-
-
-