Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion config/AISHELL3/preprocess.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ preprocessing:
language: "zh"
audio:
sampling_rate: 22050
max_wav_value: 32768.0
max_wav_value: 32767.0
stft:
filter_length: 1024
hop_length: 256
Expand Down
2 changes: 1 addition & 1 deletion config/LJSpeech/preprocess.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ preprocessing:
language: "en"
audio:
sampling_rate: 22050
max_wav_value: 32768.0
max_wav_value: 32767.0
stft:
filter_length: 1024
hop_length: 256
Expand Down
2 changes: 1 addition & 1 deletion config/LJSpeech_paper/preprocess.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ preprocessing:
language: "en"
audio:
sampling_rate: 22050
max_wav_value: 32768.0
max_wav_value: 32767.0
stft:
filter_length: 1024
hop_length: 256
Expand Down
2 changes: 1 addition & 1 deletion config/LibriTTS/preprocess.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ preprocessing:
language: "en"
audio:
sampling_rate: 22050
max_wav_value: 32768.0
max_wav_value: 32767.0
stft:
filter_length: 1024
hop_length: 256
Expand Down
37 changes: 37 additions & 0 deletions config/Sinhala/model.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
transformer:
encoder_layer: 4
encoder_head: 2
encoder_hidden: 256
decoder_layer: 6
decoder_head: 2
decoder_hidden: 256
conv_filter_size: 1024
conv_kernel_size: [9, 1]
encoder_dropout: 0.2
decoder_dropout: 0.2

variance_predictor:
filter_size: 256
kernel_size: 3
dropout: 0.5

variance_embedding:
pitch_quantization: "linear" # support 'linear' or 'log', 'log' is allowed only if the pitch values are not normalized during preprocessing
energy_quantization: "linear" # support 'linear' or 'log', 'log' is allowed only if the energy values are not normalized during preprocessing
n_bins: 256

# gst:
# use_gst: False
# conv_filters: [32, 32, 64, 64, 128, 128]
# gru_hidden: 128
# token_size: 128
# n_style_token: 10
# attn_head: 4

multi_speaker: False

max_seq_len: 12000

vocoder:
model: "HiFi-GAN" # support 'HiFi-GAN', 'MelGAN'
speaker: "universal" # support 'LJSpeech', 'universal'
30 changes: 30 additions & 0 deletions config/Sinhala/preprocess.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
dataset: "Sinhala"

path:
corpus_path: "/home/danoja/MSC/Project/implementation/Data/Sinhala"
lexicon_path: "/home/danoja/MSC/Project/implementation/DanojaDias_FastSpeech2/FastSpeech2/lexicon/sinhala-lexicon.txt"
raw_path: "./raw_data/Sinhala"
preprocessed_path: "/home/danoja/MSC/Project/implementation/DanojaDias_FastSpeech2/FastSpeech2/preprocessed_data/Sinhala"

preprocessing:
val_size: 512
text:
text_cleaners: ["sinhala_cleaners"]
language: "si"
audio:
sampling_rate: 22050
max_wav_value: 32767.0
stft:
filter_length: 1024
hop_length: 256
win_length: 1024
mel:
n_mel_channels: 80
mel_fmin: 0
mel_fmax: 8000 # please set to 8000 for HiFi-GAN vocoder, set to null for MelGAN vocoder
pitch:
feature: "frame_level" # support 'phoneme_level' or 'frame_level'
normalization: True
energy:
feature: "frame_level" # support 'phoneme_level' or 'frame_level'
normalization: True
20 changes: 20 additions & 0 deletions config/Sinhala/train.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
path:
ckpt_path: "./output/ckpt/Sinhala"
log_path: "./output/log/Sinhala"
result_path: "./output/result/Sinhala"
optimizer:
batch_size: 2
betas: [0.9, 0.98]
eps: 0.000000001
weight_decay: 0.0
grad_clip_thresh: 1.0
grad_acc_step: 1
warm_up_step: 4000
anneal_steps: [300000, 400000, 500000]
anneal_rate: 0.3
step:
total_step: 900000
log_step: 100
synth_step: 1000
val_step: 1000
save_step: 50000
Loading