Adds script to generate speech samples

fakufaku · fakufaku · commit 66ddc55d50bb · 2020-05-07T22:56:22.000+09:00
diff --git a/README.md b/README.md
@@ -6,6 +6,8 @@ of the paper
 
 > R. Scheibler and N. Ono, [*"Fast Independent Vector Extraction by Iterative SINR Maximization,"*](http://arxiv.org/abs/1910.10654) 2019.
 
+Speech samples are available [here](http://robinscheibler.org/icassp2020).
+
 Abstract
 --------
 
@@ -47,6 +49,11 @@ An `environment.yml` file is provided to install the required dependencies.
     # switch to new environment
     conda activate 2019_scheibler_five
 
+Listen
+------
+
+Samples are available [here
+
 Test FIVE
 ---------
 
@@ -226,6 +233,7 @@ Summary of the Files in this Repo
     paper_sim_config.json  # simulation configuration file
     paper_plot_figures.py  # plots the figures from the paper
     paper_plot_everything.py  # plots all the output of paper_simulation.py
+    make_separation_samples.py  # create sample separated signals
 
     data  # directory containing simulation results
     rrtools  # tools for parallel simulation
diff --git a/example.py b/example.py
@@ -22,30 +22,32 @@
 
 This script requires the `mir_eval` to run, and `tkinter` and `sounddevice` packages for the GUI option.
 """
+import os
 import sys
 import time
+from pathlib import Path
 
 import matplotlib
 import numpy as np
+from mir_eval.separation import bss_eval_sources
+from scipy.io import wavfile
+
 from auxiva_pca import auxiva_pca
 from five import five
-
 # Get the data if needed
 from get_data import get_data, samples_dir
 from ive import ogive
-from mir_eval.separation import bss_eval_sources
 from overiva import overiva
 from pyroomacoustics.bss import projection_back
-from routines import PlaySoundGUI, grid_layout, random_layout, semi_circle_layout
-from scipy.io import wavfile
-
-get_data()
+from routines import (PlaySoundGUI, grid_layout, random_layout,
+                      semi_circle_layout)
 from samples.generate_samples import sampling, wav_read_center
 
 # Once we are sure the data is there, import some methods
 # to select and read samples
 sys.path.append(samples_dir)
 
+SEP_SAMPLES_DIR = Path("separation_samples")
 
 # We concatenate a few samples to make them long enough
 if __name__ == "__main__":
@@ -96,6 +98,15 @@
         action="store_true",
         help="Creates a small GUI for easy playback of the sound samples",
     )
+    parser.add_argument(
+        "--no_plot", action="store_true", help="Do not plot anything",
+    )
+    parser.add_argument(
+        "--sinr", default=5, type=int, help="Signal-to-Interference-and-Noise Ratio",
+    )
+    parser.add_argument(
+        "--seed", default=7284023459, type=int, help="Seed for the simulation",
+    )
     parser.add_argument(
         "--save",
         action="store_true",
@@ -127,13 +138,13 @@
     use_real_R = False
 
     # fix the randomness for repeatability
-    np.random.seed(30)
+    np.random.seed(args.seed)
 
     # set the source powers, the first one is half
     source_std = np.ones(n_sources_target)
     source_std[0] /= np.sqrt(2.0)
 
-    SINR = 5  # signal-to-interference-and-noise ratio
+    SINR = args.sinr  # signal-to-interference-and-noise ratio
     SINR_diffuse_ratio = 0.9999  # ratio of uncorrelated to diffuse noise
 
     # STFT parameters
@@ -161,13 +172,17 @@
     )
     # interferer_locs = grid_layout([3., 5.5], n_sources - n_sources_target, offset=[6.5, 1., 1.7])
     interferer_locs = random_layout(
-        [3.0, 5.5, 1.5], n_sources - n_sources_target, offset=[6.5, 1.0, 0.5], seed=1234
+        [3.0, 5.5, 1.5], n_sources - n_sources_target, offset=[6.5, 1.0, 0.5],
     )
     source_locs = np.concatenate((target_locs, interferer_locs), axis=1)
 
     # Prepare the signals
     wav_files = sampling(
-        1, n_sources, f"{samples_dir}/metadata.json", gender_balanced=True, seed=2222
+        1,
+        n_sources,
+        f"{samples_dir}/metadata.json",
+        gender_balanced=True,
+        seed=args.seed,
     )[0]
     signals = wav_read_center(wav_files, seed=123)
 
@@ -237,7 +252,7 @@ def callback_mix(
 
     # reference is taken at microphone 0
     ref = np.vstack(
-            [separate_recordings[0, :1], np.sum(separate_recordings[1:, :1], axis=0)]
+        [separate_recordings[0, :1], np.sum(separate_recordings[1:, :1], axis=0)]
     )
 
     SDR, SIR, eval_time = [], [], []
@@ -272,7 +287,9 @@ def convergence_callback(Y, **kwargs):
         eval_time.append(t_exit - t_enter)
 
     if args.algo.startswith("ogive"):
-        callback_checkpoints = list(range(1, ogive_iter + ogive_iter // n_iter, ogive_iter // n_iter))
+        callback_checkpoints = list(
+            range(1, ogive_iter + ogive_iter // n_iter, ogive_iter // n_iter)
+        )
     else:
         callback_checkpoints = list(range(1, n_iter + 1))
     if args.no_cb:
@@ -386,44 +403,55 @@ def convergence_callback(Y, **kwargs):
     print(f"SDR: In: {SDR[0, 0]:6.2f} dB -> Out: {SDR[-1, 0]:6.2f} dB")
     print(f"SIR: In: {SIR[0, 0]:6.2f} dB -> Out: {SIR[-1, 0]:6.2f} dB")
 
-    import matplotlib.pyplot as plt
+    if not args.no_plot:
+        import matplotlib.pyplot as plt
 
-    plt.figure()
+        plt.figure()
 
-    plt.subplot(2, 1, 1)
-    plt.specgram(mics_signals[0], NFFT=1024, Fs=room.fs)
-    plt.title("Microphone 0 input")
+        plt.subplot(2, 1, 1)
+        plt.specgram(mics_signals[0], NFFT=1024, Fs=room.fs)
+        plt.title("Microphone 0 input")
 
-    plt.subplot(2, 1, 2)
-    plt.specgram(y_hat[:, 0], NFFT=1024, Fs=room.fs)
-    plt.title("Extracted source")
+        plt.subplot(2, 1, 2)
+        plt.specgram(y_hat[:, 0], NFFT=1024, Fs=room.fs)
+        plt.title("Extracted source")
 
-    plt.tight_layout(pad=0.5)
+        plt.tight_layout(pad=0.5)
 
-    plt.figure()
-    plt.plot([0] + callback_checkpoints, SDR[:, 0], label="SDR", marker="*")
-    plt.plot([0] + callback_checkpoints, SIR[:, 0], label="SIR", marker="o")
-    plt.legend()
-    plt.tight_layout(pad=0.5)
+        if not args.no_cb:
+            plt.figure()
+            plt.plot([0] + callback_checkpoints, SDR[:, 0], label="SDR", marker="*")
+            plt.plot([0] + callback_checkpoints, SIR[:, 0], label="SIR", marker="o")
+            plt.legend()
+            plt.tight_layout(pad=0.5)
 
-    if not args.gui:
-        plt.show()
-    else:
-        plt.show(block=False)
+        if not args.gui:
+            plt.show()
+        else:
+            plt.show(block=False)
 
     if args.save:
-        wavfile.write(
-            "bss_iva_mix.wav",
-            room.fs,
-            pra.normalize(mics_signals[0, :], bits=16).astype(np.int16),
+
+        scale = (0.95 * (2 ** 15)) / np.max(
+            [np.abs(s).max() for s in [mics_signals[0, :], ref, y_hat]]
         )
-        for i, sig in enumerate(y_hat):
+
+        if not SEP_SAMPLES_DIR.exists():
+            os.mkdir(SEP_SAMPLES_DIR)
+
+        def wavsave(type_, fs, audio):
             wavfile.write(
-                "bss_iva_source{}.wav".format(i + 1),
-                room.fs,
-                pra.normalize(sig, bits=16).astype(np.int16),
+                SEP_SAMPLES_DIR
+                / f"sample_{SINR}_{args.seed}_{args.algo}_{args.dist}_{n_mics}_{type_}.wav",
+                fs,
+                (scale * audio).astype(np.int16),
             )
 
+        wavsave("mix", room.fs, mics_signals[0])
+        wavsave("ref", room.fs, ref[0])
+        for i, sig in enumerate(y_hat.T):
+            wavsave(f"source{i}", room.fs, sig)
+
     if args.gui:
 
         from tkinter import Tk
diff --git a/get_data.py b/get_data.py
@@ -21,6 +21,7 @@
 This script can be used to download the data used in the experiments.
 """
 import os
+
 from pyroomacoustics.datasets.utils import download_uncompress
 
 url_data = "https://zenodo.org/record/3066489/files/cmu_arctic_concat15.tar.gz"
@@ -30,7 +31,11 @@
 
 def get_data():
     if os.path.exists(samples_dir):
-        print("The samples directory " f"{samples_dir}" " seems to exist already. Delete if re-download is needed.")
+        print(
+            "The samples directory "
+            f"{samples_dir}"
+            " seems to exist already. Delete if re-download is needed."
+        )
     else:
         print("Downloading the samples... ", end="")
         download_uncompress(url_data, temp_dir)
@@ -41,5 +46,7 @@ def get_data():
         print("done.")
 
 
+get_data()
+
 if __name__ == "__main__":
-    get_data()
+    pass
diff --git a/make_separation_samples.py b/make_separation_samples.py
@@ -0,0 +1,110 @@
+import os
+import subprocess
+from pathlib import Path
+
+SEP_SAMPLES_DIR = Path("separation_samples")
+
+N_ITER = {
+    "five": 3,
+    "overiva": 10,
+    "auxiva": 50,
+}
+
+ALGO_NAMES = {
+    "five": "FIVE",
+    "overiva": "OverIVA",
+    "auxiva": "AuxIVA",
+}
+
+if __name__ == "__main__":
+
+    if not SEP_SAMPLES_DIR.exists():
+        os.mkdir(SEP_SAMPLES_DIR)
+
+    f = open(SEP_SAMPLES_DIR / "table.html", "w")
+
+    print(
+        """<table>
+  <tr>
+    <td># mics</td>
+    <td>sample #</td>
+    <td>algo.</td>
+    <td>clean</td>
+    <td>mix</td>
+    <td>output</td>
+    <td>SDR</td>
+    <td>SIR</td>
+    <td>iter.</td>
+    <td>runtime</td>
+  </tr>""",
+        file=f,
+    )
+
+    for sinr in [5]:
+        for dist in ["gauss"]:
+            for n_mics in [2, 3, 5, 8]:
+                for i_seed, seed in enumerate(["2785643", "398745627", "58984517"]):
+                    for algo in ["five", "overiva", "auxiva"]:
+
+                        print(f"sinr={sinr} mics={n_mics} seed={seed} algo={algo}")
+
+                        command = [
+                            "python",
+                            "./example.py",
+                            "-m",
+                            str(n_mics),
+                            "-a",
+                            algo,
+                            "-d",
+                            "gauss",
+                            "-n",
+                            str(N_ITER[algo]),
+                            "--seed",
+                            str(seed),
+                            "--save",
+                            "--no_cb",
+                            "--no_plot",
+                        ]
+
+                        out = subprocess.run(command, capture_output=True)
+
+                        if out.returncode != 0:
+                            print("Failed!!")
+                            print("stderr:")
+                            print(out.stderr)
+                            print("stdout:")
+                            print(out.stdout)
+
+                        else:
+                            lines = out.stdout.decode().split("\n")
+
+                            for l in lines:
+                                e = l.split()
+                                if len(e) == 0:
+                                    continue
+                                elif l.startswith("Processing"):
+                                    runtime = e[2]
+                                elif l.startswith("SDR"):
+                                    sdr = e[6]
+                                elif l.startswith("SIR"):
+                                    sir = e[6]
+
+                            print(
+                                f"""  <tr>
+    <td>{n_mics}</td>
+    <td>{i_seed + 1}</td>
+    <td>{ALGO_NAMES[algo]}</td>
+    <td><audio controls="controls" type="audio/wav" src="<SEPDIR>/sample_{sinr}_{seed}_{algo}_{dist}_{n_mics}_ref.wav"><a>play</a></audio></td>
+    <td><audio controls="controls" type="audio/wav" src="<SEPDIR>/sample_{sinr}_{seed}_{algo}_{dist}_{n_mics}_mix.wav"><a>play</a></audio></td>
+    <td><audio controls="controls" type="audio/wav" src="<SEPDIR>/sample_{sinr}_{seed}_{algo}_{dist}_{n_mics}_source0.wav"><a>play</a></audio></td>
+    <td>{sdr} dB</td>
+    <td>{sir} dB</td>
+    <td>{N_ITER[algo]}</td>
+    <td>{runtime} s</td>
+  </tr>""",
+                                file=f,
+                            )
+
+    print("</table>", file=f)
+
+    f.close()