Skip to content

Commit 5950eea

Browse files
committed
Support training your own vocoder
1 parent 630023c commit 5950eea

File tree

5 files changed

+37
-12
lines changed

5 files changed

+37
-12
lines changed

.vscode/launch.json

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
{
2+
// 使用 IntelliSense 了解相关属性。
3+
// 悬停以查看现有属性的描述。
4+
// 欲了解更多信息,请访问: https://go.microsoft.com/fwlink/?linkid=830387
5+
"version": "0.2.0",
6+
"configurations": [
7+
{
8+
"name": "Python: Vocoder Preprocess",
9+
"type": "python",
10+
"request": "launch",
11+
"program": "vocoder_preprocess.py",
12+
"console": "integratedTerminal",
13+
"args": [
14+
"..\\..\\chs1"
15+
],
16+
},
17+
{
18+
"name": "Python: Vocoder Train",
19+
"type": "python",
20+
"request": "launch",
21+
"program": "vocoder_train.py",
22+
"console": "integratedTerminal",
23+
"args": [
24+
"dev", "..\\..\\chs1"
25+
],
26+
}
27+
]
28+
}

synthesizer/hparams.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,3 @@ def parse(self, string):
8787
silence_min_duration_split = 0.4, # Duration in seconds of a silence for an utterance to be split
8888
utterance_min_duration = 1.6, # Duration in seconds below which utterances are discarded
8989
)
90-
91-
def hparams_debug_string():
92-
return str(hparams)

synthesizer/synthesize.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,20 @@
11
import torch
22
from torch.utils.data import DataLoader
3-
from synthesizer.hparams import hparams_debug_string
43
from synthesizer.synthesizer_dataset import SynthesizerDataset, collate_synthesizer
54
from synthesizer.models.tacotron import Tacotron
65
from synthesizer.utils.text import text_to_sequence
76
from synthesizer.utils.symbols import symbols
87
import numpy as np
98
from pathlib import Path
109
from tqdm import tqdm
10+
import sys
1111

1212

1313
def run_synthesis(in_dir, out_dir, model_dir, hparams):
1414
# This generates ground truth-aligned mels for vocoder training
1515
synth_dir = Path(out_dir).joinpath("mels_gta")
16-
synth_dir.mkdir(exist_ok=True)
17-
print(hparams_debug_string(hparams))
16+
synth_dir.mkdir(parents=True, exist_ok=True)
17+
print(str(hparams))
1818

1919
# Check for GPU
2020
if torch.cuda.is_available():
@@ -59,12 +59,12 @@ def run_synthesis(in_dir, out_dir, model_dir, hparams):
5959
metadata_fpath = in_dir.joinpath("train.txt")
6060
mel_dir = in_dir.joinpath("mels")
6161
embed_dir = in_dir.joinpath("embeds")
62-
62+
num_workers = 0 if sys.platform.startswith("win") else 2;
6363
dataset = SynthesizerDataset(metadata_fpath, mel_dir, embed_dir, hparams)
6464
data_loader = DataLoader(dataset,
65-
collate_fn=lambda batch: collate_synthesizer(batch, r),
65+
collate_fn=lambda batch: collate_synthesizer(batch),
6666
batch_size=hparams.synthesis_batch_size,
67-
num_workers=2,
67+
num_workers=num_workers,
6868
shuffle=False,
6969
pin_memory=True)
7070

@@ -78,9 +78,9 @@ def run_synthesis(in_dir, out_dir, model_dir, hparams):
7878

7979
# Parallelize model onto GPUS using workaround due to python bug
8080
if device.type == "cuda" and torch.cuda.device_count() > 1:
81-
_, mels_out, _ = data_parallel_workaround(model, texts, mels, embeds)
81+
_, mels_out, _ , _ = data_parallel_workaround(model, texts, mels, embeds)
8282
else:
83-
_, mels_out, _ = model(texts, mels, embeds)
83+
_, mels_out, _, _ = model(texts, mels, embeds)
8484

8585
for j, k in enumerate(idx):
8686
# Note: outputs mel-spectrogram files and target ones have same names, just different folders

archived_untest_files/vocoder_preprocess.py renamed to vocoder_preprocess.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ class MyFormatter(argparse.ArgumentDefaultsHelpFormatter, argparse.RawDescriptio
1717
"Path to the directory containing your SV2TTS directory. If you specify both --in_dir and "
1818
"--out_dir, this argument won't be used.")
1919
parser.add_argument("--model_dir", type=str,
20-
default="synthesizer/saved_models/pretrained/", help=\
20+
default="synthesizer/saved_models/train3/", help=\
2121
"Path to the pretrained model directory.")
2222
parser.add_argument("-i", "--in_dir", type=str, default=argparse.SUPPRESS, help= \
2323
"Path to the synthesizer directory that contains the mel spectrograms, the wavs and the "

0 commit comments

Comments
 (0)