Skip to content

Commit

Permalink
merge examples and experiments
Browse files Browse the repository at this point in the history
  • Loading branch information
thammegowda committed Aug 17, 2021
1 parent 4c71d1b commit 6657b36
Show file tree
Hide file tree
Showing 15 changed files with 73 additions and 218 deletions.
83 changes: 0 additions & 83 deletions examples/transformer.base.yml

This file was deleted.

80 changes: 0 additions & 80 deletions examples/transformer.test.yml

This file was deleted.

File renamed without changes.
File renamed without changes.
47 changes: 31 additions & 16 deletions experiments/transformer.base.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ optimizer:
- 0.9
- 0.98
eps: 1.0e-09
lr: 0.2
lr: 0.1

schedule:
name: noam
Expand All @@ -27,7 +27,7 @@ schedule:
model_dim: 512

criterion:
name: smooth_kld
name: smooth_kld #options "cross_entropy", "smooth_kld", "binary_cross_entropy", "triplet_loss"
args:
label_smoothing: 0.1

Expand All @@ -37,32 +37,47 @@ prep: # data preparation
shared_vocab: true # true means same vocab for src and tgt, false means different vocabs
src_len: 256 # longer sentences, decision is made as per 'truncate={true,false}'
tgt_len: 256
train_src: data/train.src # training data
train_tgt: data/train.tgt
truncate: true # what to do with longer sentences: if true truncate at src_len or tgt_len; if false filter away
train_src: experiments/sample-data/sampl.test.fr.tok
train_tgt: experiments/sample-data/sampl.test.en.tok
valid_src: experiments/sample-data/sampl.valid.fr.tok
valid_tgt: experiments/sample-data/sampl.valid.en.tok
valid_src: data/valid.src
valid_tgt: data/valid.tgt
mono_src: [] # monolingual data for learning vocab or BPE
mono_tgt: []
tester:
decoder:
beam_size: 4
batch_size: 12000 # this is for 1 beam; effective_batch_size = batch_size / beam_size
lp_alpha: 0.0 # length penalty
tune: # If this block is missing, then tuner will not be run, and some default values are picked from the code
trials: 6 # number of random trials, in addition to "suggested" values
tune_src: data/valid.src # dataset for tuning
tune_ref: data/valid.tgt
beam_size: [1, 4, 8] # pool of values for beam_size
ensemble: [1, 5, 10]
lp_alpha: [0.0, 0.4, 0.6, 1.0]
suggested: # list of suggested values for beam_size, ensemble, lp_alpha
- 1, 1, 0.0
- 4, 1, 0.0
- 4, 1, 0.6
- 1, 5, 0.0
- 4, 5, 0.0
- 4, 5, 0.6
- 1, 10, 0.0
- 4, 10, 0.0
- 4, 10, 0.6
suit: # suit of tests to run after the training
# name of test and list of src.tok, ref files (ref should be unmodified)
valid:
- experiments/sample-data/sampl.valid.fr.tok
- experiments/sample-data/sampl.valid.en # reference, unmodified -- not tokenized
test:
- experiments/sample-data/sampl.test.fr.tok
- experiments/sample-data/sampl.test.en # reference, unmodified -- not tokenized
valid: # name of test and list of src.tok, ref files (ref should be unmodified)
- data/valid.src
- data/valid.tgt
# in case we want to use external de tokenizer. interface:: $detokenizer < $out > out.detok
# by default it uses moses-tokenizer python wrapper to perl script
# detokenizer: cut -f1 | python -m rtg.tool.unicode_fix -l hi -d | perl scripts/indic-tok.perl -d
trainer:
init_args:
chunk_size: 10 # generation in chunks of time steps to reduce memory consumption
grad_accum: 1 # How many batches to accumulate gradients over
batch_size: 4200 # not exceeding these many tokens (including paddings). in tensor2tensor it is mean batch size
check_point: 1000 # how often to checkpoint?
keep_models: 10 # how many checkpoints to keep on disk (small enough to save disk, large enough for checkpt averaging
steps: 200000 # how many steps to train
keep_in_mem: True
updated_at: '2019-03-09T21:15:33.707183'
seed: 12345 # fix the manual seed of pytorch + cuda + numpy + python_stdlib RNGs. Remove/comment this to disable
File renamed without changes.
File renamed without changes.
61 changes: 36 additions & 25 deletions experiments/transformer.test.yml
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
# this is a mini model for testing. See transformer.base or transformer.big
model_args:
ff_size: 256
hid_size: 128
n_heads: 2
ff_size: 512
hid_size: 256
n_heads: 4
attn_dropout: 0.1
dropout: 0.2
enc_layers: 1
dec_layers: 2
enc_layers: 3
dec_layers: 3
src_vocab: 8000
tgt_vocab: 8000
tied_emb: three-way
Expand All @@ -25,11 +24,11 @@ schedule:
name: noam
args:
constant: 2
warmup: 8000
model_dim: 128
warmup: 4000
model_dim: 256

criterion:
name: smooth_kld
name: smooth_kld #options "cross_entropy", "smooth_kld", "binary_cross_entropy", "triplet_loss"
args:
label_smoothing: 0.1

Expand All @@ -39,31 +38,43 @@ prep:
shared_vocab: true
src_len: 128
tgt_len: 128
train_src: experiments/sample-data/sampl.test.fr.tok
train_tgt: experiments/sample-data/sampl.test.en.tok
truncate: false
valid_src: experiments/sample-data/sampl.valid.fr.tok
valid_tgt: experiments/sample-data/sampl.valid.en.tok
train_src: data/train.src
train_tgt: data/train.tgt
truncate: true
valid_src: data/valid.src
valid_tgt: data/valid.tgt
mono_src: []
mono_tgt: []
tester:
decoder:
beam_size: 4
batch_size: 12000 # this is for 1 beam; effective_batch_size = batch_size / beam_size
lp_alpha: 0.0 # length penalty
tune:
trials: 6
tune_src: data/valid.src
tune_ref: data/valid.tgt
beam_size: [1, 4, 8]
ensemble: [1, 5, 10]
lp_alpha: [0.0, 0.6]
suggested:
- 1, 1, 0.0
- 4, 1, 0.0
- 4, 1, 0.6
- 1, 5, 0.0
- 4, 5, 0.0
- 4, 5, 0.6
- 1, 10, 0.0
- 4, 10, 0.0
- 4, 10, 0.6
suit:
valid:
- experiments/sample-data/sampl.valid.fr.tok
- experiments/sample-data/sampl.valid.en # reference, unmodified -- not tokenized
test:
- experiments/sample-data/sampl.test.fr.tok
- experiments/sample-data/sampl.test.en # reference, unmodified -- not tokenized-
- data/valid.src
- data/valid.tgt
trainer:
init_args:
chunk_size: 10
batch_size: 512
check_point: 200
grad_accum: 1 # How many batches to accumulate gradients over
batch_size: 1024
check_point: 400
keep_models: 10
steps: 1000
steps: 2000
updated_at: '2019-03-09T21:15:33.707183'
seed: 12345
File renamed without changes.
File renamed without changes.
File renamed without changes.
20 changes: 6 additions & 14 deletions tests/test_tfmcls.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,16 @@
#
# Author: Thamme Gowda [tg (at) isi (dot) edu]
# Created: 6/15/21
from rtg.emb import tfmcls
from rtg.registry import log
import subprocess
import random
from pathlib import Path

from torchtext.datasets import DBpedia
from pathlib import Path
import random

import rtg
from rtg.registry import registry, MODEL
import pytest
from rtg.pipeline import Pipeline, Experiment
import tempfile
from rtg.exp import load_conf
import torch
import shutil
from rtg.pipeline import Pipeline
from rtg.registry import log
from rtg.registry import registry, MODEL
from . import sanity_check_experiment


Expand Down Expand Up @@ -72,6 +67,3 @@ def test_tfmcls_model():
sanity_check_experiment(exp, samples=False, shared_vocab=False)
print(f"Cleaning up {tmp_dir}")
# shutil.rmtree(tmp_dir, ignore_errors=True)



0 comments on commit 6657b36

Please sign in to comment.