Trainning in French

Hi,
I want to train the model in French, i use the data set from website 'common_voice'.
**I wrote commoncoive_fr.py like this:**
from concurrent.futures import ProcessPoolExecutor
from functools import partial
import numpy as np
import os
from util import audio


def build_from_path(in_dir, out_dir, num_workers=1, tqdm=lambda x: x):
  executor = ProcessPoolExecutor(max_workers=num_workers)
  futures = []
  index = 1
  with open(os.path.join(in_dir, 'train.tsv'), encoding='utf-8') as f:
    for line in f:
      parts = line.strip().split('\t')
      wav_path = os.path.join(in_dir,parts[1])
      text = parts[2]
      futures.append(executor.submit(partial(_process_utterance, out_dir, index, wav_path, text)))
      index += 1
  return [future.result() for future in tqdm(futures)]


def _process_utterance(out_dir, index, wav_path, text):
  '''Preprocesses a single utterance audio/text pair.

  This writes the mel and linear scale spectrograms to disk and returns a tuple to write
  to the train.txt file.

  Args:
    out_dir: The directory to write the spectrograms into
    index: The numeric index to use in the spectrogram filenames.
    wav_path: Path to the audio file containing the speech input
    text: The text spoken in the input audio file

  Returns:
    A (spectrogram_filename, mel_filename, n_frames, text) tuple to write to train.txt
  '''

  # Load the audio to a numpy array:
  wav = audio.load_wav(wav_path)

  # Compute the linear-scale spectrogram from the wav:
  spectrogram = audio.spectrogram(wav).astype(np.float32)
  n_frames = spectrogram.shape[1]

  # Compute a mel-scale spectrogram from the wav:
  mel_spectrogram = audio.melspectrogram(wav).astype(np.float32)

  # Write the spectrograms to disk:
  spectrogram_filename = 'commonvoice_fr-spec.npy' % index
  mel_filename = 'commonvoice_fr-mel.npy' % index
  np.save(os.path.join(out_dir, spectrogram_filename), spectrogram.T, allow_pickle=False)
  np.save(os.path.join(out_dir, mel_filename), mel_spectrogram.T, allow_pickle=False)

  # Return a tuple describing this training example:
  return (spectrogram_filename, mel_filename, n_frames, text)


And  I modified the preprocess.py like this:
import argparse
import os
from multiprocessing import cpu_count
from tqdm import tqdm
from datasets import amy, blizzard, ljspeech, kusal, mailabs,commonvoice_fr
from datasets import mrs
from hparams import hparams, hparams_debug_string
import sys


def preprocess_blizzard(args):
  in_dir = os.path.join(args.base_dir, 'Blizzard2012')
  out_dir = os.path.join(args.base_dir, args.output)
  os.makedirs(out_dir, exist_ok=True)
  metadata = blizzard.build_from_path(
      in_dir, out_dir, args.num_workers, tqdm=tqdm)
  write_metadata(metadata, out_dir)


def preprocess_ljspeech(args):
  in_dir = os.path.join(args.base_dir, 'LJSpeech-1.1')
  out_dir = os.path.join(args.base_dir, args.output)
  os.makedirs(out_dir, exist_ok=True)
  metadata = ljspeech.build_from_path(
      in_dir, out_dir, args.num_workers, tqdm=tqdm)
  write_metadata(metadata, out_dir)

def preprocess_mrs(args):
  in_dir = args.mrs_dir
  out_dir = os.path.join(args.base_dir, args.output)
  username = args.mrs_username
  os.makedirs(out_dir, exist_ok=True)
  metadata = mrs.build_from_path(
      in_dir, out_dir, username, args.num_workers, tqdm=tqdm)
  write_metadata(metadata, out_dir)

def preprocess_amy(args):
  in_dir = os.path.join(args.base_dir, 'amy')
  out_dir = os.path.join(args.base_dir, args.output)
  os.makedirs(out_dir, exist_ok=True)
  metadata = amy.build_from_path(in_dir, out_dir, args.num_workers, tqdm=tqdm)
  write_metadata(metadata, out_dir)


def preprocess_kusal(args):
  in_dir = os.path.join(args.base_dir, 'kusal')
  out_dir = os.path.join(args.base_dir, args.output)
  os.makedirs(out_dir, exist_ok=True)
  metadata = kusal.build_from_path(
      in_dir, out_dir, args.num_workers, tqdm=tqdm)
  write_metadata(metadata, out_dir)


def preprocess_mailabs(args):
  in_dir = os.path.join(args.mailabs_books_dir)
  out_dir = os.path.join(args.base_dir, args.output)
  os.makedirs(out_dir, exist_ok=True)
  books = args.books
  metadata = mailabs.build_from_path(
      in_dir, out_dir, books, args.num_workers, tqdm)
  write_metadata(metadata, out_dir)

def preprocess_commonvoice(args):
    in_dir = os.path.join(args.base_dir,'clips')
    out_dir = os.path.join(args.base_dir,args.output)
    os.makedirs(out_dir,exist_ok=True)
    metdata = commonvoice_fr.build_from_path(in_dir,out_dir,
            args.num_workers,tqdm=tqdm)
    write_metadata(metadata,out_dir)



def write_metadata(metadata, out_dir):
  with open(os.path.join(out_dir, 'train.txt'), 'w', encoding='utf-8') as f:
    for m in metadata:
      f.write('|'.join([str(x) for x in m]) + '\n')
  frames = sum([m[2] for m in metadata])
  hours = frames * hparams.frame_shift_ms / (3600 * 1000)
  print('Wrote %d utterances, %d frames (%.2f hours)' %
        (len(metadata), frames, hours))
  print('Max input length:  %d' % max(len(m[3]) for m in metadata))
  print('Max output length: %d' % max(m[2] for m in metadata))
  with open("metadata.txt", 'w') as f:
    f.write(
        '''
          Wrote {} utterances, {} frames, {} hours\n
          Max input lengh: {} \n
          Max output length: {} \n
        '''.format(
            len(metadata), frames, hours,
            max(len(m[3]) for m in metadata), max(m[2] for m in metadata)
        )
    )


def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('--base_dir', default=os.path.expanduser('~/tacotron'))
  parser.add_argument('--mrs_dir', required=False)
  parser.add_argument('--mrs_username', required=False)
  parser.add_argument('--output', default='training')
  parser.add_argument(
      '--dataset', required=True, choices=['amy', 'blizzard', 'ljspeech',
          'kusal', 'mailabs','mrs','commonvoice']
  )
  parser.add_argument('--mailabs_books_dir',
                      help='absolute directory to the books for the mlailabs')
  parser.add_argument(
      '--books',
      help='comma-seperated and no space name of books i.e hunter_space,pink_fairy_book,etc.',
  )
  parser.add_argument('--num_workers', type=int, default=cpu_count())
  args = parser.parse_args()

  if args.dataset == 'mailabs' and args.books is None:
    parser.error("--books required if mailabs is chosen for dataset.")

  if args.dataset == 'mailabs' and args.mailabs_books_dir is None:
    parser.error(
        "--mailabs_books_dir required if mailabs is chosen for dataset.")

  print(hparams_debug_string())

  if args.dataset == 'amy':
    preprocess_amy(args)
  elif args.dataset == 'blizzard':
    preprocess_blizzard(args)
  elif args.dataset == 'ljspeech':
    preprocess_ljspeech(args)
  elif args.dataset == 'kusal':
    preprocess_kusal(args)
  elif args.dataset == 'mailabs':
    preprocess_mailabs(args)
  elif args.dataset == 'mrs':
    preprocess_mrs(args)
  elif args.dataset == 'commonvoice':
    preprocess_commonvoice(args)


if __name__ == "__main__":
  main()


But when I preprocces the data by using the commande:
**python3 preprocess.py --dataset commonvoice** 
I got this erros:
Traceback (most recent call last):
  File "/usr/lib/python3.5/concurrent/futures/process.py", line 175, in _process_worker
    r = call_item.fn(*call_item.args, **call_item.kwargs)
  File "/root/mimic2/datasets/commonvoice_fr.py", line 64, in _process_utterance
    spectrogram_filename = 'commonvoice_fr-spec.npy' % index
TypeError: not all arguments converted during string formatting
"""


Could you please help me to solve this problem?
Thanks

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Trainning in French #48

Load the audio to a numpy array:

Compute the linear-scale spectrogram from the wav:

Compute a mel-scale spectrogram from the wav:

Write the spectrograms to disk:

Return a tuple describing this training example:

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Trainning in French #48

Description

Load the audio to a numpy array:

Compute the linear-scale spectrogram from the wav:

Compute a mel-scale spectrogram from the wav:

Write the spectrograms to disk:

Return a tuple describing this training example:

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions