Support training your own vocoder

babysor · babysor · commit 5950eea89561 · 2021-08-29T15:43:54.000+08:00
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -0,0 +1,28 @@
+{
+    // 使用 IntelliSense 了解相关属性。 
+    // 悬停以查看现有属性的描述。
+    // 欲了解更多信息，请访问: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Python: Vocoder Preprocess",
+            "type": "python",
+            "request": "launch",
+            "program": "vocoder_preprocess.py",
+            "console": "integratedTerminal",
+            "args": [
+                "..\\..\\chs1"
+            ],
+        },
+        {
+            "name": "Python: Vocoder Train",
+            "type": "python",
+            "request": "launch",
+            "program": "vocoder_train.py",
+            "console": "integratedTerminal",
+            "args": [
+                "dev", "..\\..\\chs1"
+            ],
+        }
+    ]
+}
diff --git a/synthesizer/hparams.py b/synthesizer/hparams.py
@@ -87,6 +87,3 @@ def parse(self, string):
         silence_min_duration_split = 0.4,           # Duration in seconds of a silence for an utterance to be split
         utterance_min_duration = 1.6,               # Duration in seconds below which utterances are discarded
         )
-
-def hparams_debug_string():
-    return str(hparams)
diff --git a/synthesizer/synthesize.py b/synthesizer/synthesize.py
@@ -1,20 +1,20 @@
 import torch
 from torch.utils.data import DataLoader
-from synthesizer.hparams import hparams_debug_string
 from synthesizer.synthesizer_dataset import SynthesizerDataset, collate_synthesizer
 from synthesizer.models.tacotron import Tacotron
 from synthesizer.utils.text import text_to_sequence
 from synthesizer.utils.symbols import symbols
 import numpy as np
 from pathlib import Path
 from tqdm import tqdm
+import sys
 
 
 def run_synthesis(in_dir, out_dir, model_dir, hparams):
     # This generates ground truth-aligned mels for vocoder training
     synth_dir = Path(out_dir).joinpath("mels_gta")
-    synth_dir.mkdir(exist_ok=True)
-    print(hparams_debug_string(hparams))
+    synth_dir.mkdir(parents=True, exist_ok=True)
+    print(str(hparams))
 
     # Check for GPU
     if torch.cuda.is_available():
@@ -59,12 +59,12 @@ def run_synthesis(in_dir, out_dir, model_dir, hparams):
     metadata_fpath = in_dir.joinpath("train.txt")
     mel_dir = in_dir.joinpath("mels")
     embed_dir = in_dir.joinpath("embeds")
-
+    num_workers = 0 if sys.platform.startswith("win") else 2;
     dataset = SynthesizerDataset(metadata_fpath, mel_dir, embed_dir, hparams)
     data_loader = DataLoader(dataset,
-                             collate_fn=lambda batch: collate_synthesizer(batch, r),
+                             collate_fn=lambda batch: collate_synthesizer(batch),
                              batch_size=hparams.synthesis_batch_size,
-                             num_workers=2,
+                             num_workers=num_workers,
                              shuffle=False,
                              pin_memory=True)
 
@@ -78,9 +78,9 @@ def run_synthesis(in_dir, out_dir, model_dir, hparams):
 
             # Parallelize model onto GPUS using workaround due to python bug
             if device.type == "cuda" and torch.cuda.device_count() > 1:
-                _, mels_out, _ = data_parallel_workaround(model, texts, mels, embeds)
+                _, mels_out, _ , _ = data_parallel_workaround(model, texts, mels, embeds)
             else:
-                _, mels_out, _ = model(texts, mels, embeds)
+                _, mels_out, _, _  = model(texts, mels, embeds)
 
             for j, k in enumerate(idx):
                 # Note: outputs mel-spectrogram files and target ones have same names, just different folders
diff --git a/vocoder_preprocess.py b/vocoder_preprocess.py
@@ -17,7 +17,7 @@ class MyFormatter(argparse.ArgumentDefaultsHelpFormatter, argparse.RawDescriptio
         "Path to the directory containing your SV2TTS directory. If you specify both --in_dir and "
         "--out_dir, this argument won't be used.")
     parser.add_argument("--model_dir", type=str, 
-                        default="synthesizer/saved_models/pretrained/", help=\
+                        default="synthesizer/saved_models/train3/", help=\
         "Path to the pretrained model directory.")
     parser.add_argument("-i", "--in_dir", type=str, default=argparse.SUPPRESS, help= \
         "Path to the synthesizer directory that contains the mel spectrograms, the wavs and the "
diff --git a/vocoder_train.py b/vocoder_train.py

Original file line number	Diff line number	Diff line change
`@@ -87,6 +87,3 @@ def parse(self, string):`
`87`	`87`	`silence_min_duration_split = 0.4, # Duration in seconds of a silence for an utterance to be split`
`88`	`88`	`utterance_min_duration = 1.6, # Duration in seconds below which utterances are discarded`
`89`	`89`	`)`
`90`		`-`
`91`		`-def hparams_debug_string():`
`92`		`- return str(hparams)`