diff --git a/egs/build_your_own_voice/s1/README.md b/egs/build_your_own_voice/s1/README.md
index 53fe8830..90e63cde 100644
--- a/egs/build_your_own_voice/s1/README.md
+++ b/egs/build_your_own_voice/s1/README.md
@@ -1,48 +1,63 @@
-Download Merlin
----------------
+# Build your own voice
-git clone https://github.com/CSTR-Edinburgh/merlin.git
+To build your own voice, `cd egs/build_your_own_voice/s1` and follow the below steps:
-Setup
------
+## Setting up
-To setup voice:
+The first step is to run setup as it creates directories and some text files for testing.
-./01_setup.sh give_a_voice_name
+The next steps demonstrate on how to setup voice.
-Prepare Data
-------------
+```sh
+./01_setup.sh my_voice
+```
-To derive labels, use alignment scripts provided below:
-a) state_align - https://github.com/CSTR-Edinburgh/merlin/tree/master/misc/scripts/alignment/state_align
-b) phone_align - https://github.com/CSTR-Edinburgh/merlin/tree/master/misc/scripts/alignment/phone_align
+It also creates a global config file: `conf/global_settings.cfg`, where default settings are stored.
+You need to modify these params as per your own data.
-Then, chose the vocoder:
-a) STRAIGHT - extracts 60-dim MGC, 25-dim BAP, 1-dim LF0
-b) WORLD - extracts 60-dim MGC, variable-dim BAP, 1-dim LF0
- - BAP dim (1 for 16Khz, 5 for 48Khz)
-c) WORLD_v2 - extracts 60-dim MGC, 5-dim BAP, 1-dim LF0
+## Prepare labels
-To derive acousitc features, use vocoder scripts provided below:
-a) STRAIGHT - https://github.com/CSTR-Edinburgh/merlin/blob/master/misc/scripts/vocoder/straight/extract_features_for_merlin.sh
-b) WORLD - https://github.com/CSTR-Edinburgh/merlin/blob/master/misc/scripts/vocoder/world/extract_features_for_merlin.sh
-c) WORLD_v2 - https://github.com/CSTR-Edinburgh/merlin/blob/master/misc/scripts/vocoder/world_v2/extract_features_for_merlin.sh
+To prepare labels
+```sh
+./02_prepare_labels.sh
+```
-Run below script for instructions:
-./02_prepare_data.sh
+## Prepare acoustic features
+
+To prepare acoustic features
+```sh
+./03_prepare_acoustic_features.sh
+```
-Run Merlin
-----------
+## Prepare config files
-Once after setup, use below script to create acoustic, duration models and perform final test synthesis:
+At this point, we have to prepare two config files to train DNN models
+- Acoustic Model
+- Duration Model
-./03_run_merlin.sh
+To prepare config files:
+```sh
+./04_prepare_conf_files.sh conf/global_settings.cfg
+```
+Four config files will be generated: two for training, and two for testing.
+## Train duration model
-Generate new sentences
-----------------------
+To train duration model:
+```sh
+./05_train_duration_model.sh
+```
-To generate new sentences, please follow [steps] (https://github.com/CSTR-Edinburgh/merlin/issues/28) in below script:
+## Train acoustic model
-./04_merlin_synthesis.sh
+To train acoustic model:
+```sh
+./06_train_acoustic_model.sh
+```
+## Synthesize speech
+
+To synthesize speech:
+```sh
+./07_run_merlin.sh
+```
diff --git a/egs/roger_blizzard2008/README b/egs/roger_blizzard2008/README
new file mode 100644
index 00000000..e48a992c
--- /dev/null
+++ b/egs/roger_blizzard2008/README
@@ -0,0 +1,12 @@
+About the roger blizzard2008 corpus
+
+The roger database was constructed at the Centre for Speech Technology Research at the University of Edinburgh for the Blizzard Challange 2008. The database was only available to registered participants in the challange (see http://www.cstr.ed.ac.uk/projects/roger_blizzard2008/).
+
+It contains 9609 utterances (~15h) from one english speaker (roger) at 16 kHz
+
+Each subdirectory of this directory contains the
+scripts for a sequence of experiments.
+
+ s1: To run roger_demo or roger_full with WORLD vocoder
+
+
diff --git a/egs/roger_blizzard2008/s1/01_setup.sh b/egs/roger_blizzard2008/s1/01_setup.sh
new file mode 100755
index 00000000..46b6a3c4
--- /dev/null
+++ b/egs/roger_blizzard2008/s1/01_setup.sh
@@ -0,0 +1,169 @@
+#!/bin/bash
+
+if test "$#" -ne 1; then
+ echo "################################"
+ echo "Usage:"
+ echo "./01_setup.sh "
+ echo ""
+ echo "Give a voice name: roger_demo or roger_full"
+ echo " Demo uses theherald1 (281 utterances, 42.8 minutes)"
+ echo " Full uses carroll, arcitc and theherald1-3 (4871 utterances, ~8h)"
+ echo "################################"
+ exit 1
+fi
+
+if [ ! -d "${ROGER_DB}" ]; then
+ echo "ERROR: Variable ROGER_DB must be set to the roger database."
+ echo " Use: export ROGER_DB=path/to/db/"
+ exit 1
+fi
+
+### Step 1: setup directories and the training data files ###
+echo "Step 1:"
+
+current_working_dir=$(pwd)
+merlin_dir=$(dirname $(dirname $(dirname $current_working_dir)))
+experiments_dir=${current_working_dir}/experiments
+data_dir=${current_working_dir}/database
+
+voice_name=$1
+voice_dir=${experiments_dir}/${voice_name}
+
+acoustic_dir=${voice_dir}/acoustic_model
+duration_dir=${voice_dir}/duration_model
+synthesis_dir=${voice_dir}/test_synthesis
+
+mkdir -p ${data_dir}
+mkdir -p ${experiments_dir}
+mkdir -p ${voice_dir}
+mkdir -p ${acoustic_dir}
+mkdir -p ${duration_dir}
+mkdir -p ${synthesis_dir}
+mkdir -p ${acoustic_dir}/data
+mkdir -p ${duration_dir}/data
+mkdir -p ${synthesis_dir}/txt
+
+
+audio_dir=database/wav
+txt_dir=database/txt
+label_dir=database/labels
+
+# Select the utterance list(s) to be used for training.
+if [[ "$voice_name" == *"demo"* ]]
+then
+ # The demo version only uses theherald1 (281 utterances, 42.8 minutes)
+ uttLists=("theherald1")
+elif [[ "$voice_name" == *"full"* ]]
+then
+ # The full version uses all utterance lists with meaningful utterances.
+ # Using: carroll, arcitc, theherald1-3 (4871 utterances, ~8h).
+ uttLists=("carroll" "arctic" "theherald") # Can be any of carroll, unilex, address, spelling, arcitc, emphasis, theherald, theherald1, theherald2, theherald3, all_new, total.
+else
+ echo "Undefined voice name ($voice_name)...please use roger_demo or roger_full !!"
+ exit 1
+fi
+
+# Collect utterance ids of necessary audio files.
+utts=()
+for uttList in "${uttLists[@]}"; do
+ mapfile -t -O ${#utts[@]} utts < $ROGER_DB/stp/$uttList # -t remove trailing newline, -O start index to add entries.
+done
+# Remove duplicates.
+utts=($(printf "%s\n" "${utts[@]}" | sort -u))
+
+# Audios have to be removed because utterance list selection could have been changed.
+rm -rf $audio_dir
+# Leave this check for fast testing, when $audio_dir does not have to be removed.
+if [ ! -e $audio_dir ]; then
+ mkdir -p $audio_dir
+ # Collect necessary audio files.
+ for utt in "${utts[@]}"; do
+ # cp $ROGER_DB/wav/${utt:0:7}/${utt}.wav $audio_dir/${utt}.wav
+ ln -sf $ROGER_DB/wav/${utt:0:7}/${utt}.wav $audio_dir/${utt}.wav
+ done
+fi
+
+# Labels have to be removed because utterance list selection could have been changed.
+rm -rf $txt_dir
+# Leave this check for fast testing, when $txt_dir does not have to be removed.
+if [ ! -e $txt_dir ]; then
+ mkdir -p $txt_dir
+ # The utts.data file contains all labels.
+ cp ${ROGER_DB}/utts.data ${txt_dir}/utts.data
+ # Combine the selected utterances to a regex pattern.
+ utts_pat=$(echo ${utts[@]}|tr " " "|")
+ # Select those labes of utts.data which belong to the selected utterances.
+ cat ${txt_dir}/utts.data | grep -wE "${utts_pat}" >| ${txt_dir}/utts_selected.data
+ # Turn every line of utts.data into a txt file using the utterance id as file name.
+ awk -F' ' -v outDir=${txt_dir} '{print substr($0,length($1)+2,length($0)) > outDir"/"substr($1,2,length($1)-1)".txt"}' ${txt_dir}/utts_selected.data
+ # Remove unnecessary files.
+ rm ${txt_dir}/utts.data
+ rm ${txt_dir}/utts_selected.data
+fi
+
+# Clear the labels directory.
+rm -rf $label_dir
+
+### create some test files ###
+echo "Hello world." > ${synthesis_dir}/txt/test_001.txt
+echo "Hi, this is a demo voice from Merlin." > ${synthesis_dir}/txt/test_002.txt
+echo "Hope you guys enjoy free open-source voices from Merlin." > ${synthesis_dir}/txt/test_003.txt
+printf "test_001\ntest_002\ntest_003" > ${synthesis_dir}/test_id_list.scp
+
+global_config_file=conf/global_settings.cfg
+
+### default settings ###
+echo "######################################" > $global_config_file
+echo "############# PATHS ##################" >> $global_config_file
+echo "######################################" >> $global_config_file
+echo "" >> $global_config_file
+
+echo "MerlinDir=${merlin_dir}" >> $global_config_file
+echo "WorkDir=${current_working_dir}" >> $global_config_file
+echo "" >> $global_config_file
+
+echo "######################################" >> $global_config_file
+echo "############# PARAMS #################" >> $global_config_file
+echo "######################################" >> $global_config_file
+echo "" >> $global_config_file
+
+echo "Voice=${voice_name}" >> $global_config_file
+echo "Labels=state_align" >> $global_config_file
+echo "QuestionFile=questions-radio_dnn_416.hed" >> $global_config_file
+echo "Vocoder=WORLD" >> $global_config_file
+echo "SamplingFreq=16000" >> $global_config_file
+echo "SilencePhone='sil'" >> $global_config_file
+echo "FileIDList=file_id_list.scp" >> $global_config_file
+echo "" >> $global_config_file
+
+echo "######################################" >> $global_config_file
+echo "######### No. of files ###############" >> $global_config_file
+echo "######################################" >> $global_config_file
+echo "" >> $global_config_file
+
+# Automatically select 5% of the data for validation and test set.
+num_files=$(ls -1 $audio_dir | wc -l)
+num_dev_set=$(awk "BEGIN { pc=${num_files}*0.05; print(int(pc)) }")
+num_train_set=$(($num_files-2*$num_dev_set))
+echo "Train=$num_train_set" >> $global_config_file
+echo "Valid=$num_dev_set" >> $global_config_file
+echo "Test=$num_dev_set" >> $global_config_file
+echo "" >> $global_config_file
+
+echo "######################################" >> $global_config_file
+echo "############# TOOLS ##################" >> $global_config_file
+echo "######################################" >> $global_config_file
+echo "" >> $global_config_file
+
+echo "ESTDIR=${merlin_dir}/tools/speech_tools" >> $global_config_file
+echo "FESTDIR=${merlin_dir}/tools/festival" >> $global_config_file
+echo "FESTVOXDIR=${merlin_dir}/tools/festvox" >> $global_config_file
+echo "" >> $global_config_file
+echo "HTKDIR=${merlin_dir}/tools/bin/htk" >> $global_config_file
+echo "" >> $global_config_file
+
+echo "Merlin default voice settings configured in \"$global_config_file\""
+echo "Modify these params as per your data..."
+echo "eg., sampling frequency, no. of train files etc.,"
+echo "setup done...!"
+
diff --git a/egs/roger_blizzard2008/s1/02_prepare_labels.sh b/egs/roger_blizzard2008/s1/02_prepare_labels.sh
new file mode 100755
index 00000000..cb52f940
--- /dev/null
+++ b/egs/roger_blizzard2008/s1/02_prepare_labels.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+global_config_file=conf/global_settings.cfg
+source $global_config_file
+
+if test "$#" -ne 3; then
+ echo "################################"
+ echo "Usage:"
+ echo "./02_prepare_labels.sh "
+ echo ""
+ echo "default path to wav dir(Input): database/wav"
+ echo "default path to txt dir(Input): database/txt"
+ echo "default path to lab dir(Output): database/labels"
+ echo "################################"
+ exit 1
+fi
+
+wav_dir=$1
+inp_txt=$2
+lab_dir=$3
+
+####################################
+########## Prepare labels ##########
+####################################
+
+prepare_labels=true
+copy=true
+
+if [ "$prepare_labels" = true ]; then
+ echo "Step 2: "
+ echo "Preparing labels..."
+
+ if [ "$Labels" == "state_align" ]
+ then
+ ./scripts/run_state_aligner.sh $wav_dir $inp_txt $lab_dir $global_config_file
+ elif [ "$Labels" == "phone_align" ]
+ then
+ ./scripts/run_phone_aligner.sh $wav_dir $inp_txt $lab_dir $global_config_file
+ else
+ echo "These labels ($Labels) are not supported as of now...please use state_align or phone_align!!"
+ fi
+fi
+
+if [ "$copy" = true ]; then
+ echo "Copying labels to duration and acoustic data directories..."
+
+ duration_data_dir=experiments/${Voice}/duration_model/data
+ acoustic_data_dir=experiments/${Voice}/acoustic_model/data
+
+ cp -r $lab_dir/label_$Labels $duration_data_dir
+ cp -r $lab_dir/label_$Labels $acoustic_data_dir
+
+ ls $lab_dir/label_$Labels > $duration_data_dir/$FileIDList
+ ls $lab_dir/label_$Labels > $acoustic_data_dir/$FileIDList
+
+ sed -i 's/\.lab//g' $duration_data_dir/$FileIDList
+ sed -i 's/\.lab//g' $acoustic_data_dir/$FileIDList
+
+ echo "done...!"
+fi
diff --git a/egs/roger_blizzard2008/s1/03_prepare_acoustic_features.sh b/egs/roger_blizzard2008/s1/03_prepare_acoustic_features.sh
new file mode 100755
index 00000000..f8267aaf
--- /dev/null
+++ b/egs/roger_blizzard2008/s1/03_prepare_acoustic_features.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+global_config_file=conf/global_settings.cfg
+source $global_config_file
+
+if test "$#" -ne 2; then
+ echo "################################"
+ echo "Usage:"
+ echo "./03_prepare_acoustic_features.sh "
+ echo ""
+ echo "default path to wav dir(Input): database/wav"
+ echo "default path to feat dir(Output): database/feats"
+ echo "################################"
+ exit 1
+fi
+
+wav_dir=$1
+feat_dir=$2
+
+if [ ! "$(ls -A ${wav_dir})" ]; then
+ echo "Please place your audio files in: ${wav_dir}"
+ exit 1
+fi
+
+####################################
+##### prepare vocoder features #####
+####################################
+
+prepare_feats=true
+copy=true
+
+if [ "$prepare_feats" = true ]; then
+ echo "Step 3:"
+ echo "Prepare acoustic features using WORLD vocoder..."
+ python ${MerlinDir}/misc/scripts/vocoder/world/extract_features_for_merlin.py ${MerlinDir} ${wav_dir} ${feat_dir} $SamplingFreq
+fi
+
+if [ "$copy" = true ]; then
+ echo "Copying features to acoustic data directory..."
+ acoustic_data_dir=experiments/${Voice}/acoustic_model/data
+ cp -r ${feat_dir}/* $acoustic_data_dir
+ echo "done...!"
+fi
diff --git a/egs/roger_blizzard2008/s1/04_prepare_conf_files.sh b/egs/roger_blizzard2008/s1/04_prepare_conf_files.sh
new file mode 100755
index 00000000..12962042
--- /dev/null
+++ b/egs/roger_blizzard2008/s1/04_prepare_conf_files.sh
@@ -0,0 +1,25 @@
+#!/bin/bash -e
+
+if test "$#" -ne 1; then
+ echo "################################"
+ echo "Usage:"
+ echo "./04_prepare_conf_files.sh "
+ echo ""
+ echo "default path to global conf file: conf/global_settings.cfg"
+ echo "Config files will be prepared based on settings in global conf file"
+ echo "################################"
+ exit 1
+fi
+
+global_config_file=$1
+
+
+### Step 4: prepare config files for acoustic, duration models and for synthesis ###
+echo "Step 4:"
+
+echo "preparing config files for acoustic, duration models..."
+./scripts/prepare_config_files.sh $global_config_file
+
+echo "preparing config files for synthesis..."
+./scripts/prepare_config_files_for_synthesis.sh $global_config_file
+
diff --git a/egs/roger_blizzard2008/s1/05_train_duration_model.sh b/egs/roger_blizzard2008/s1/05_train_duration_model.sh
new file mode 100755
index 00000000..26b7326f
--- /dev/null
+++ b/egs/roger_blizzard2008/s1/05_train_duration_model.sh
@@ -0,0 +1,23 @@
+#!/bin/bash -e
+
+global_config_file=conf/global_settings.cfg
+source $global_config_file
+
+if test "$#" -ne 1; then
+ echo "################################"
+ echo "Usage:"
+ echo "./05_train_duration_model.sh "
+ echo ""
+ echo "Default path to duration conf file: conf/duration_${Voice}.conf"
+ echo "################################"
+ exit 1
+fi
+
+duration_conf_file=$1
+
+### Step 5: train duration model ###
+echo "Step 5:"
+echo "training duration model..."
+./scripts/${cuda_cmd} "experiments/${Voice}/duration_model/log/_train.log" "./scripts/submit.sh" "${MerlinDir}/src/run_merlin.py" "${duration_conf_file}"
+
+
diff --git a/egs/roger_blizzard2008/s1/06_train_acoustic_model.sh b/egs/roger_blizzard2008/s1/06_train_acoustic_model.sh
new file mode 100755
index 00000000..e955f5a0
--- /dev/null
+++ b/egs/roger_blizzard2008/s1/06_train_acoustic_model.sh
@@ -0,0 +1,23 @@
+#!/bin/bash -e
+
+global_config_file=conf/global_settings.cfg
+source $global_config_file
+
+if test "$#" -ne 1; then
+ echo "################################"
+ echo "Usage:"
+ echo "./06_train_acoustic_model.sh "
+ echo ""
+ echo "Default path to acoustic conf file: conf/acoustic_${Voice}.conf"
+ echo "################################"
+ exit 1
+fi
+
+acoustic_conf_file=$1
+
+### Step 6: train acoustic model ###
+echo "Step 6:"
+echo "training acoustic model..."
+./scripts/${cuda_cmd} "experiments/${Voice}/acoustic_model/log/_train.log" "./scripts/submit.sh" "${MerlinDir}/src/run_merlin.py" "$acoustic_conf_file"
+
+
diff --git a/egs/roger_blizzard2008/s1/07_run_merlin.sh b/egs/roger_blizzard2008/s1/07_run_merlin.sh
new file mode 100755
index 00000000..f9daea49
--- /dev/null
+++ b/egs/roger_blizzard2008/s1/07_run_merlin.sh
@@ -0,0 +1,41 @@
+#!/bin/bash -e
+
+global_config_file=conf/global_settings.cfg
+source $global_config_file
+
+if test "$#" -ne 3; then
+ echo "################################"
+ echo "Usage: "
+ echo "./07_run_merlin.sh "
+ echo ""
+ echo "default path to text dir: experiments/${Voice}/test_synthesis/txt"
+ echo "default path to test duration conf file: conf/test_dur_synth_${Voice}.conf"
+ echo "default path to test synthesis conf file: conf/test_synth_${Voice}.conf"
+ echo "################################"
+ exit 1
+fi
+
+inp_txt=$1
+test_dur_config_file=$2
+test_synth_config_file=$3
+
+### Step 7: synthesize speech from text ###
+echo "Step 7:"
+echo "synthesizing speech from text..."
+
+echo "preparing full-contextual labels using Festival frontend..."
+lab_dir=$(dirname $inp_txt)
+./scripts/prepare_labels_from_txt.sh $inp_txt $lab_dir $global_config_file
+
+echo "synthesizing durations..."
+./scripts/${cuda_short_cmd} "experiments/${Voice}/test_synthesis/_dur.log" "./scripts/submit.sh" "${MerlinDir}/src/run_merlin.py" "$test_dur_config_file"
+
+echo "synthesizing speech..."
+./scripts/${cuda_short_cmd} "experiments/${Voice}/test_synthesis/_synth.log" "./scripts/submit.sh" "${MerlinDir}/src/run_merlin.py" "$test_synth_config_file"
+
+echo "deleting intermediate synthesis files..."
+./scripts/remove_intermediate_files.sh $global_config_file
+
+echo "synthesized audio files are in: experiments/${Voice}/test_synthesis/wav"
+echo "All successfull!! Your demo voice is ready :)"
+
diff --git a/egs/roger_blizzard2008/s1/README.md b/egs/roger_blizzard2008/s1/README.md
new file mode 100644
index 00000000..1669dabe
--- /dev/null
+++ b/egs/roger_blizzard2008/s1/README.md
@@ -0,0 +1,57 @@
+Download Merlin
+---------------
+
+Step 1: git clone https://github.com/CSTR-Edinburgh/merlin.git
+
+Install tools
+-------------
+
+Step 2: cd merlin/tools
+Step 3: ./compile_tools.sh
+Step 4: install festival and HTS at merlin/tools/
+ Possible help: [Issue96](https://github.com/CSTR-Edinburgh/merlin/issues/96)
+
+Setup
+-----
+
+This example uses the roger database from the blizzard 2008 challange.
+The database is not freely available, you can only run this example, if you have access to the database.
+Use
+Step 5: export ROGER_DB=/path/to/your/roger_database/
+in your console before running the example.
+
+To setup voice:
+
+Take a look at ./01_setup.sh
+You probably have to change the way the database is accessed, this depends on how your database is structured.
+Check the lines 70-95, the comments should guide you through the process.
+
+Demo voice
+----------
+
+To run demo voice, please follow below steps:
+
+Step 6: cd merlin/egs/roger_blizzard2008/s1
+Step 7: ./run_demo_voice.sh
+
+Demo voice trains only on 281 utterances (theherald1) and shouldn't take long.
+
+Full voice
+----------
+
+To run full voice, please follow below steps:
+
+Step 6: cd merlin/egs/roger_blizzard2008/s1
+Step 7: ./run_full_voice.sh
+
+Full voice utilizes carroll, arctic and theherald1-3 (4871 utterances). The training of the voice approximately takes 3 to 4 hours.
+
+Generate new sentences
+----------------------
+
+To generate new sentences, please follow below steps:
+
+Step 8: Run either demo voice or full voice.
+Step 9: Place the txt files containing the utterances in experiments/roger_demo OR roger_full/test_synthesis/txt
+Step 10: ./merlin_synthesis.sh
+
diff --git a/egs/roger_blizzard2008/s1/cmd.sh b/egs/roger_blizzard2008/s1/cmd.sh
new file mode 100755
index 00000000..158e72c8
--- /dev/null
+++ b/egs/roger_blizzard2008/s1/cmd.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+# "queue.pl" uses qsub. The options to it are
+# options to qsub. If you have GridEngine installed,
+# change this to a queue you have access to.
+# Otherwise, use "run.pl", which will run jobs locally
+# (make sure your --num-jobs options are no more than
+# the number of cpus on your machine.
+
+#a) Sun grid options (IDIAP)
+# ATTENTION: Do that in your shell: SETSHELL grid
+#export cuda_cmd="queue.pl -l gpu"
+#export cuda_short_cmd="queue.pl -l sgpu"
+#export cuda_cmd="queue.pl -l q1d,hostname=dynamix03"
+#export cuda_cmd="..."
+
+#b) BUT cluster options
+#export cuda_cmd="queue.pl -q long.q@@pco203 -l gpu=1"
+#export cuda_cmd="queue.pl -q long.q@pcspeech-gpu"
+
+#c) run it locally...
+export cuda_cmd=run.pl
+export cuda_short_cmd=$cuda_cmd
diff --git a/egs/roger_blizzard2008/s1/conf/logging_config.conf b/egs/roger_blizzard2008/s1/conf/logging_config.conf
new file mode 100644
index 00000000..d4aea5a3
--- /dev/null
+++ b/egs/roger_blizzard2008/s1/conf/logging_config.conf
@@ -0,0 +1,129 @@
+# configuration for logging
+#
+# logging levels are: DEBUG, INFO, WARNING, ERROR, CRITICAL
+
+
+
+[loggers]
+# list all loggers here, then configure them below
+# if you don't list a logger here, the configuration below will be ignored
+# whenever you add a new logger to the code, remember to name it here and add a new logger_ section below
+keys=root,configuration,main,main.train_DNN,plotting,labels,dur,acoustic_comp,acoustic_norm,dnn_generation,param_generation,wav_generation,subprocess,ListDataProvider
+
+# two handlers; you should never need to change this
+[handlers]
+keys=console, file
+
+# two formatters for theose handlers; again, you should never need to change this
+[formatters]
+keys=console, file
+
+# this is the root loggger - don't change this (other than changing the level)
+[logger_root]
+level=WARNING
+handlers=file,console
+
+# configure each logger like this:
+# logger_
+# this logger is called 'configuration'
+[logger_configuration]
+# logging level - change this to WARNING for tested code, or to CRITICAL if you only want logging just before a crash
+level=DEBUG
+# name the handlers - this should be one or both of file,console depending where you want log messages written to
+handlers=file,console
+# name of the logger as referred to in the code
+qualname=configuration
+# whether to propagate messages to parent loggers (incl. the root logger), in general you want propagate=0
+propagate=0
+
+[logger_main]
+level=DEBUG
+handlers=file,console
+qualname=main
+propagate=0
+
+[logger_main.train_DNN]
+level=DEBUG
+handlers=file,console
+qualname=main.train_DNN
+propagate=0
+
+[logger_plotting]
+level=INFO
+qualname=plotting
+handlers=console,file
+propagate=0
+
+[logger_labels]
+level=DEBUG
+handlers=file,console
+qualname=labels
+propagate=0
+
+[logger_dur]
+level=DEBUG
+handlers=file,console
+qualname=dur
+propagate=0
+
+[logger_dnn_generation]
+level=DEBUG
+handlers=file,console
+qualname=dnn_generation
+propagate=0
+
+[logger_param_generation]
+level=DEBUG
+handlers=file,console
+qualname=param_generation
+propagate=0
+
+
+[logger_wav_generation]
+level=INFO
+handlers=file,console
+qualname=wav_generation
+propagate=0
+
+[logger_subprocess]
+level=INFO
+handlers=file,console
+qualname=subprocess
+propagate=0
+
+
+[logger_acoustic_norm]
+level=DEBUG
+handlers=file,console
+qualname=acoustic_norm
+propagate=0
+
+[logger_acoustic_comp]
+level=DEBUG
+handlers=file,console
+qualname=acoustic_comp
+propagate=0
+
+
+[logger_ListDataProvider]
+level=WARNING
+handlers=file,console
+qualname=ListDataProvider
+propagate=0
+
+# [handler_file]
+# do NOT define this here - it will be added automatically
+# because we need to know the log file name before creating this handler
+
+[handler_console]
+class=StreamHandler
+formatter=console
+args=(sys.stdout,)
+
+[formatter_file]
+class=logging.Formatter
+format=%(asctime)s %(levelname)8s%(name)15s: %(message)s
+
+[formatter_console]
+class=logplot.logging_plotting.ColouredFormatter
+format=%(asctime)s %(levelname)8s%(name)15s: %(message)s
diff --git a/egs/roger_blizzard2008/s1/merlin_synthesis.sh b/egs/roger_blizzard2008/s1/merlin_synthesis.sh
new file mode 100755
index 00000000..f8d363af
--- /dev/null
+++ b/egs/roger_blizzard2008/s1/merlin_synthesis.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+source cmd.sh
+
+if test "$#" -ne 0; then
+ echo "Usage: ./merlin_synthesis.sh"
+ exit 1
+fi
+
+global_config_file=conf/global_settings.cfg
+
+if [ ! -f $global_config_file ]; then
+ echo "Please run steps from 1-5..."
+ exit 1
+else
+ source $global_config_file
+fi
+
+### define few variables here
+testDir=experiments/${Voice}/test_synthesis
+
+txt_dir=${testDir}/txt
+
+### Synthesize speech ###
+echo "Synthesizing speech..."
+./07_run_merlin.sh $txt_dir conf/test_dur_synth_${Voice}.conf conf/test_synth_${Voice}.conf
+#./scripts/${cuda_short_cmd} "experiments/${Voice}/test_synthesis/_synth_dur.log" "./scripts/submit.sh" "${MerlinDir}/src/run_merlin.py" "conf/test_dur_synth_${Voice}.conf"
+#./scripts/${cuda_short_cmd} "experiments/${Voice}/test_synthesis/_synth_speech.log" "./scripts/submit.sh" "${MerlinDir}/src/run_merlin.py" "conf/test_synth_${Voice}.conf"
+
+#echo "deleting intermediate synthesis files..."
+#./scripts/remove_intermediate_files.sh $global_config_file
+
+# echo "synthesized audio files are in: experiments/${Voice}/test_synthesis/wav"
+
diff --git a/egs/roger_blizzard2008/s1/run_demo_voice.sh b/egs/roger_blizzard2008/s1/run_demo_voice.sh
new file mode 100755
index 00000000..7f03edfd
--- /dev/null
+++ b/egs/roger_blizzard2008/s1/run_demo_voice.sh
@@ -0,0 +1,33 @@
+#!/bin/bash -e
+
+source cmd.sh
+export cuda_cmd=$cuda_short_cmd # Demo uses few data so use short queues.
+#export ROGER_DB=/idiap/resource/database/blizzard2008/blizzard_release/
+
+if test "$#" -ne 0; then
+ echo "Usage: ./run_full_voice.sh"
+ exit 1
+fi
+
+### Step 1: setup directories and the training data files ###
+./01_setup.sh roger_demo
+
+### Step 2: prepare festival labels ###
+./02_prepare_labels.sh database/wav database/txt database/labels
+
+### Step 3: Extract acoustic features from audio files ###
+./03_prepare_acoustic_features.sh database/wav database/feats
+
+### Step 4: prepare config files for acoustic, duration models and for synthesis ###
+./04_prepare_conf_files.sh conf/global_settings.cfg
+
+### Step 5: train duration model ###
+./05_train_duration_model.sh conf/duration_roger_demo.conf
+
+### Step 6: train acoustic model ###
+./06_train_acoustic_model.sh conf/acoustic_roger_demo.conf
+
+### Step 7: synthesize speech ###
+./07_run_merlin.sh experiments/roger_demo/test_synthesis/txt/ conf/test_dur_synth_roger_demo.conf conf/test_synth_roger_demo.conf
+
+
diff --git a/egs/roger_blizzard2008/s1/run_full_voice.sh b/egs/roger_blizzard2008/s1/run_full_voice.sh
new file mode 100755
index 00000000..1c87e4f8
--- /dev/null
+++ b/egs/roger_blizzard2008/s1/run_full_voice.sh
@@ -0,0 +1,32 @@
+#!/bin/bash -e
+
+source cmd.sh
+# export ROGER_DB=/idiap/resource/database/blizzard2008/blizzard_release/
+
+if test "$#" -ne 0; then
+ echo "Usage: ./run_full_voice.sh"
+ exit 1
+fi
+
+### Step 1: setup directories and the training data files ###
+./01_setup.sh roger_full
+
+### Step 2: prepare festival labels ###
+./02_prepare_labels.sh database/wav database/txt database/labels
+
+### Step 3: Extract acoustic features from audio files ###
+./03_prepare_acoustic_features.sh database/wav database/feats
+
+### Step 4: prepare config files for acoustic, duration models and for synthesis ###
+./04_prepare_conf_files.sh conf/global_settings.cfg
+
+### Step 5: train duration model ###
+./05_train_duration_model.sh conf/duration_roger_full.conf
+
+### Step 6: train acoustic model ###
+./06_train_acoustic_model.sh conf/acoustic_roger_full.conf
+
+### Step 7: synthesize speech ###
+./07_run_merlin.sh experiments/roger_full/test_synthesis/txt/ conf/test_dur_synth_roger_full.conf conf/test_synth_roger_full.conf
+
+
diff --git a/egs/roger_blizzard2008/s1/scripts/prepare_config_files.sh b/egs/roger_blizzard2008/s1/scripts/prepare_config_files.sh
new file mode 100755
index 00000000..17a87662
--- /dev/null
+++ b/egs/roger_blizzard2008/s1/scripts/prepare_config_files.sh
@@ -0,0 +1,195 @@
+#!/bin/bash
+
+if test "$#" -ne 1; then
+ echo "Usage: ./scripts/prepare_config_files.sh conf/global_settings.cfg"
+ exit 1
+fi
+
+if [ ! -f $1 ]; then
+ echo "Global config file doesn't exist"
+ exit 1
+else
+ source $1
+fi
+
+SED=sed
+if [[ "$OSTYPE" == "darwin"* ]]; then
+ which gsed > /dev/null
+ if [[ "$?" != 0 ]]; then
+ echo "You need to install GNU sed with 'brew install gnu-sed' on osX"
+ exit 1
+ fi
+ SED=gsed
+fi
+
+
+#########################################
+######## duration config file ###########
+#########################################
+
+duration_config_file=conf/duration_${Voice}.conf
+
+# Start with a general recipe...
+cp -f $MerlinDir/misc/recipes/duration_demo.conf $duration_config_file
+
+# ... and modify it:
+
+$SED -i s#'Merlin:.*'#'Merlin: '$MerlinDir# $duration_config_file
+$SED -i s#'TOPLEVEL:.*'#'TOPLEVEL: '${WorkDir}# $duration_config_file
+$SED -i s#'work:.*'#'work: %(TOPLEVEL)s/experiments/'${Voice}'/duration_model'# $duration_config_file
+
+$SED -i s#'file_id_list:.*'#'file_id_list: %(data)s/'${FileIDList}# $duration_config_file
+
+# [Labels]
+
+$SED -i s#"silence_pattern:.*"#"silence_pattern: ['*-"${SilencePhone}"+*']"# $duration_config_file
+$SED -i s#'label_type:.*'#'label_type: '${Labels}# $duration_config_file
+$SED -i s#'label_align:.*'#'label_align: %(TOPLEVEL)s/experiments/'${Voice}'/duration_model/data/label_'${Labels}# $duration_config_file
+$SED -i s#'question_file_name:.*'#'question_file_name: %(Merlin)s/misc/questions/'${QuestionFile}# $duration_config_file
+
+
+# [Outputs]
+if [ "$Labels" == "state_align" ]
+then
+ $SED -i s#'dur\s*:.*'#'dur: 5'# $duration_config_file
+elif [ "$Labels" == "phone_align" ]
+then
+ $SED -i s#'dur\s*:.*'#'dur: 1'# $duration_config_file
+else
+ echo "These labels ($Labels) are not supported as of now...please use state_align or phone_align!!"
+fi
+
+
+# [Architecture]
+
+if [[ "$Voice" == *"demo"* ]]
+then
+ $SED -i s#'hidden_layer_size\s*:.*'#'hidden_layer_size: [512, 512, 512, 512]'# $duration_config_file
+ $SED -i s#'hidden_layer_type\s*:.*'#'hidden_layer_type: ['\''TANH'\'', '\''TANH'\'', '\''TANH'\'', '\''TANH'\'']'# $duration_config_file
+fi
+
+$SED -i s#'sequential_training.*:.*'#'sequential_training: True'# $duration_config_file
+# $SED -i s#'learning_rate.*:.*'#'learning_rate: 0.002'# $duration_config_file
+$SED -i s#'training_epochs.*:.*'#'training_epochs: 50'# $duration_config_file
+
+
+# [Data]
+$SED -i s#'train_file_number\s*:.*'#'train_file_number: '${Train}# $duration_config_file
+$SED -i s#'valid_file_number\s*:.*'#'valid_file_number: '${Valid}# $duration_config_file
+$SED -i s#'test_file_number\s*:.*'#'test_file_number: '${Test}# $duration_config_file
+
+echo "Duration configuration settings stored in $duration_config_file"
+
+
+
+
+#########################################
+######## acoustic config file ###########
+#########################################
+
+acoustic_config_file=conf/acoustic_${Voice}.conf
+
+# Start with a general recipe...
+cp -f $MerlinDir/misc/recipes/acoustic_demo.conf $acoustic_config_file
+
+# ... and modify it:
+
+$SED -i s#'Merlin:.*'#'Merlin: '$MerlinDir# $acoustic_config_file
+$SED -i s#'TOPLEVEL:.*'#'TOPLEVEL: '${WorkDir}# $acoustic_config_file
+$SED -i s#'work:.*'#'work: %(TOPLEVEL)s/experiments/'${Voice}'/acoustic_model'# $acoustic_config_file
+
+$SED -i s#'file_id_list:.*'#'file_id_list: %(data)s/'${FileIDList}# $acoustic_config_file
+
+
+# [Labels]
+
+$SED -i s#"silence_pattern:.*"#"silence_pattern: ['*-"${SilencePhone}"+*']"# $acoustic_config_file
+$SED -i s#'label_type:.*'#'label_type: '${Labels}# $acoustic_config_file
+$SED -i s#'label_align:.*'#'label_align: %(TOPLEVEL)s/experiments/'${Voice}'/acoustic_model/data/label_'${Labels}# $acoustic_config_file
+$SED -i s#'question_file_name:.*'#'question_file_name: %(Merlin)s/misc/questions/'${QuestionFile}# $acoustic_config_file
+
+if [ "$Labels" == "state_align" ]
+then
+ $SED -i s#'subphone_feats:.*'#'subphone_feats: full'# $acoustic_config_file
+elif [ "$Labels" == "phone_align" ]
+then
+ $SED -i s#'subphone_feats:.*'#'subphone_feats: coarse_coding'# $acoustic_config_file
+else
+ echo "These labels ($Labels) are not supported as of now...please use state_align or phone_align!!"
+fi
+
+
+# [Outputs]
+
+$SED -i s#'mgc\s*:.*'#'mgc: 60'# $acoustic_config_file
+$SED -i s#'dmgc\s*:.*'#'dmgc: 180'# $acoustic_config_file
+
+if [ "$Vocoder" == "STRAIGHT" ]
+then
+ $SED -i s#'bap\s*:.*'#'bap: 25'# $acoustic_config_file
+ $SED -i s#'dbap\s*:.*'#'dbap: 75'# $acoustic_config_file
+
+elif [ "$Vocoder" == "WORLD" ]
+then
+ if [ "$SamplingFreq" == "16000" ]
+ then
+ $SED -i s#'bap\s*:.*'#'bap: 1'# $acoustic_config_file
+ $SED -i s#'dbap\s*:.*'#'dbap: 3'# $acoustic_config_file
+ elif [ "$SamplingFreq" == "48000" ]
+ then
+ $SED -i s#'bap\s*:.*'#'bap: 5'# $acoustic_config_file
+ $SED -i s#'dbap\s*:.*'#'dbap: 15'# $acoustic_config_file
+ fi
+else
+ echo "This vocoder ($Vocoder) is not supported as of now...please configure yourself!!"
+fi
+
+$SED -i s#'lf0\s*:.*'#'lf0: 1'# $acoustic_config_file
+$SED -i s#'dlf0\s*:.*'#'dlf0: 3'# $acoustic_config_file
+
+
+# [Waveform]
+$SED -i s#'vocoder_type\s*:.*'#'vocoder_type: '${Vocoder}# $acoustic_config_file
+
+$SED -i s#'samplerate\s*:.*'#'samplerate: '${SamplingFreq}# $acoustic_config_file
+if [ "$SamplingFreq" == "16000" ]
+then
+ $SED -i s#'framelength\s*:.*'#'framelength: 1024'# $acoustic_config_file
+ $SED -i s#'minimum_phase_order\s*:.*'#'minimum_phase_order: 511'# $acoustic_config_file
+ $SED -i s#'fw_alpha\s*:.*'#'fw_alpha: 0.58'# $acoustic_config_file
+
+elif [ "$SamplingFreq" == "48000" ]
+then
+ if [ "$Vocoder" == "WORLD" ]
+ then
+ $SED -i s#'framelength\s*:.*'#'framelength: 2048'# $acoustic_config_file
+ $SED -i s#'minimum_phase_order\s*:.*'#'minimum_phase_order: 1023'# $acoustic_config_file
+ else
+ $SED -i s#'framelength\s*:.*'#'framelength: 4096'# $acoustic_config_file
+ $SED -i s#'minimum_phase_order\s*:.*'#'minimum_phase_order: 2047'# $acoustic_config_file
+ fi
+ $SED -i s#'fw_alpha\s*:.*'#'fw_alpha: 0.77'# $acoustic_config_file
+else
+ echo "This sampling frequency ($SamplingFreq) never tested before...please configure yourself!!"
+fi
+
+
+# [Architecture]
+if [[ "$Voice" == *"demo"* ]]
+then
+ $SED -i s#'hidden_layer_size\s*:.*'#'hidden_layer_size: [512, 512, 512, 512]'# $acoustic_config_file
+ $SED -i s#'hidden_layer_type\s*:.*'#'hidden_layer_type: ['\''TANH'\'', '\''TANH'\'', '\''TANH'\'', '\''TANH'\'']'# $acoustic_config_file
+fi
+
+$SED -i s#'sequential_training.*:.*'#'sequential_training: True'# $acoustic_config_file
+$SED -i s#'learning_rate.*:.*'#'learning_rate: 0.002'# $acoustic_config_file
+$SED -i s#'training_epochs.*:.*'#'training_epochs: 50'# $acoustic_config_file
+
+
+# [Data]
+$SED -i s#'train_file_number\s*:.*'#'train_file_number: '${Train}# $acoustic_config_file
+$SED -i s#'valid_file_number\s*:.*'#'valid_file_number: '${Valid}# $acoustic_config_file
+$SED -i s#'test_file_number\s*:.*'#'test_file_number: '${Test}# $acoustic_config_file
+
+
+echo "Acoustic configuration settings stored in $acoustic_config_file"
diff --git a/egs/roger_blizzard2008/s1/scripts/prepare_config_files_for_synthesis.sh b/egs/roger_blizzard2008/s1/scripts/prepare_config_files_for_synthesis.sh
new file mode 100755
index 00000000..d6f672ca
--- /dev/null
+++ b/egs/roger_blizzard2008/s1/scripts/prepare_config_files_for_synthesis.sh
@@ -0,0 +1,223 @@
+#!/bin/bash
+
+if test "$#" -ne 1; then
+ echo "Usage: ./scripts/prepare_config_files_for_synthesis.sh conf/global_settings.cfg"
+ exit 1
+fi
+
+if [ ! -f $1 ]; then
+ echo "Global config file doesn't exist"
+ exit 1
+else
+ source $1
+fi
+
+SED=sed
+if [[ "$OSTYPE" == "darwin"* ]]; then
+ which gsed > /dev/null
+ if [[ "$?" != 0 ]]; then
+ echo "You need to install GNU sed with 'brew install gnu-sed' on osX"
+ exit 1
+ fi
+ SED=gsed
+fi
+
+
+#########################################
+######## duration config file ###########
+#########################################
+
+duration_config_file=conf/test_dur_synth_${Voice}.conf
+
+# Start with a general recipe...
+cp -f $MerlinDir/misc/recipes/duration_demo.conf $duration_config_file
+
+# ... and modify it:
+
+$SED -i s#'Merlin:.*'#'Merlin: '$MerlinDir# $duration_config_file
+$SED -i s#'TOPLEVEL:.*'#'TOPLEVEL: '${WorkDir}# $duration_config_file
+$SED -i s#'work:.*'#'work: %(TOPLEVEL)s/experiments/'${Voice}'/duration_model'# $duration_config_file
+
+$SED -i s#'file_id_list:.*'#'file_id_list: %(data)s/'${FileIDList}# $duration_config_file
+$SED -i s#'test_id_list\s*:.*'#'test_id_list: %(TOPLEVEL)s/experiments/'${Voice}'/test_synthesis/test_id_list.scp'# $duration_config_file
+
+
+# [Labels]
+$SED -i s#"silence_pattern:.*"#"silence_pattern: ['*-"${SilencePhone}"+*']"# $duration_config_file
+$SED -i s#'label_type:.*'#'label_type: '${Labels}# $duration_config_file
+$SED -i s#'label_align\s*:.*'#'label_align: %(TOPLEVEL)s/experiments/'${Voice}'/test_synthesis/prompt-lab'# $duration_config_file
+$SED -i s#'question_file_name\s*:.*'#'question_file_name: %(Merlin)s/misc/questions/'${QuestionFile}# $duration_config_file
+
+
+# [Outputs]
+
+if [ "$Labels" == "state_align" ]
+then
+ $SED -i s#'dur\s*:.*'#'dur: 5'# $duration_config_file
+elif [ "$Labels" == "phone_align" ]
+then
+ $SED -i s#'dur\s*:.*'#'dur: 1'# $duration_config_file
+else
+ echo "These labels ($Labels) are not supported as of now...please use state_align or phone_align!!"
+fi
+
+
+# [Waveform]
+
+$SED -i s#'test_synth_dir\s*:.*'#'test_synth_dir: %(TOPLEVEL)s/experiments/'${Voice}'/test_synthesis/gen-lab'# $duration_config_file
+
+
+# [Architecture]
+if [[ "$Voice" == *"demo"* ]]
+then
+ $SED -i s#'hidden_layer_size\s*:.*'#'hidden_layer_size: [512, 512, 512, 512]'# $duration_config_file
+ $SED -i s#'hidden_layer_type\s*:.*'#'hidden_layer_type: ['\''TANH'\'', '\''TANH'\'', '\''TANH'\'', '\''TANH'\'']'# $duration_config_file
+fi
+
+
+# [Data]
+$SED -i s#'train_file_number\s*:.*'#'train_file_number: '${Train}# $duration_config_file
+$SED -i s#'valid_file_number\s*:.*'#'valid_file_number: '${Valid}# $duration_config_file
+$SED -i s#'test_file_number\s*:.*'#'test_file_number: '${Test}# $duration_config_file
+
+
+# [Processes]
+
+$SED -i s#'DurationModel\s*:.*'#'DurationModel: True'# $duration_config_file
+$SED -i s#'GenTestList\s*:.*'#'GenTestList: True'# $duration_config_file
+
+$SED -i s#'NORMLAB\s*:.*'#'NORMLAB: True'# $duration_config_file
+
+$SED -i s#'MAKEDUR\s*:.*'#'MAKEDUR: False'# $duration_config_file
+$SED -i s#'MAKECMP\s*:.*'#'MAKECMP: False'# $duration_config_file
+$SED -i s#'NORMCMP\s*:.*'#'NORMCMP: False'# $duration_config_file
+$SED -i s#'TRAINDNN\s*:.*'#'TRAINDNN: False'# $duration_config_file
+$SED -i s#'CALMCD\s*:.*'#'CALMCD: False'# $duration_config_file
+
+$SED -i s#'DNNGEN\s*:.*'#'DNNGEN: True'# $duration_config_file
+
+echo "Duration configuration settings stored in $duration_config_file"
+
+
+
+#########################################
+######## acoustic config file ###########
+#########################################
+
+acoustic_config_file=conf/test_synth_${Voice}.conf
+
+# Start with a general recipe...
+cp -f $MerlinDir/misc/recipes/acoustic_demo.conf $acoustic_config_file
+
+# ... and modify it:
+
+$SED -i s#'Merlin\s*:.*'#'Merlin: '$MerlinDir# $acoustic_config_file
+$SED -i s#'TOPLEVEL\s*:.*'#'TOPLEVEL: '${WorkDir}# $acoustic_config_file
+$SED -i s#'work\s*:.*'#'work: %(TOPLEVEL)s/experiments/'${Voice}'/acoustic_model'# $acoustic_config_file
+
+$SED -i s#'file_id_list\s*:.*'#'file_id_list: %(data)s/'${FileIDList}# $acoustic_config_file
+$SED -i s#'test_id_list\s*:.*'#'test_id_list: %(TOPLEVEL)s/experiments/'${Voice}'/test_synthesis/test_id_list.scp'# $acoustic_config_file
+
+
+# [Labels]
+
+$SED -i s#"silence_pattern:.*"#"silence_pattern: ['*-"${SilencePhone}"+*']"# $acoustic_config_file
+$SED -i s#'enforce_silence\s*:.*'#'enforce_silence: True'# $acoustic_config_file
+$SED -i s#'label_type\s*:.*'#'label_type: '${Labels}# $acoustic_config_file
+$SED -i s#'label_align\s*:.*'#'label_align: %(TOPLEVEL)s/experiments/'${Voice}'/test_synthesis/gen-lab'# $acoustic_config_file
+$SED -i s#'question_file_name\s*:.*'#'question_file_name: %(Merlin)s/misc/questions/'${QuestionFile}# $acoustic_config_file
+if [ "$Labels" == "state_align" ]
+then
+ $SED -i s#'subphone_feats\s*:.*'#'subphone_feats: full'# $acoustic_config_file
+elif [ "$Labels" == "phone_align" ]
+then
+ $SED -i s#'subphone_feats\s*:.*'#'subphone_feats: coarse_coding'# $acoustic_config_file
+else
+ echo "These labels ($Labels) are not supported as of now...please use state_align or phone_align!!"
+fi
+
+
+# [Outputs]
+
+$SED -i s#'mgc\s*:.*'#'mgc: 60'# $acoustic_config_file
+$SED -i s#'dmgc\s*:.*'#'dmgc: 180'# $acoustic_config_file
+
+if [ "$Vocoder" == "STRAIGHT" ]
+then
+ $SED -i s#'bap\s*:.*'#'bap: 25'# $acoustic_config_file
+ $SED -i s#'dbap\s*:.*'#'dbap: 75'# $acoustic_config_file
+
+elif [ "$Vocoder" == "WORLD" ]
+then
+ if [ "$SamplingFreq" == "16000" ]
+ then
+ $SED -i s#'bap\s*:.*'#'bap: 1'# $acoustic_config_file
+ $SED -i s#'dbap\s*:.*'#'dbap: 3'# $acoustic_config_file
+ elif [ "$SamplingFreq" == "48000" ]
+ then
+ $SED -i s#'bap\s*:.*'#'bap: 5'# $acoustic_config_file
+ $SED -i s#'dbap\s*:.*'#'dbap: 15'# $acoustic_config_file
+ fi
+else
+ echo "This vocoder ($Vocoder) is not supported as of now...please configure yourself!!"
+fi
+
+$SED -i s#'lf0\s*:.*'#'lf0: 1'# $acoustic_config_file
+$SED -i s#'dlf0\s*:.*'#'dlf0: 3'# $acoustic_config_file
+
+
+# [Waveform]
+
+$SED -i s#'test_synth_dir\s*:.*'#'test_synth_dir: %(TOPLEVEL)s/experiments/'${Voice}'/test_synthesis/wav'# $acoustic_config_file
+
+$SED -i s#'vocoder_type\s*:.*'#'vocoder_type: '${Vocoder}# $acoustic_config_file
+
+$SED -i s#'samplerate\s*:.*'#'samplerate: '${SamplingFreq}# $acoustic_config_file
+if [ "$SamplingFreq" == "16000" ]
+then
+ $SED -i s#'framelength\s*:.*'#'framelength: 1024'# $acoustic_config_file
+ $SED -i s#'minimum_phase_order\s*:.*'#'minimum_phase_order: 511'# $acoustic_config_file
+ $SED -i s#'fw_alpha\s*:.*'#'fw_alpha: 0.58'# $acoustic_config_file
+
+elif [ "$SamplingFreq" == "48000" ]
+then
+ if [ "$Vocoder" == "WORLD" ]
+ then
+ $SED -i s#'framelength\s*:.*'#'framelength: 2048'# $acoustic_config_file
+ $SED -i s#'minimum_phase_order\s*:.*'#'minimum_phase_order: 1023'# $acoustic_config_file
+ else
+ $SED -i s#'framelength\s*:.*'#'framelength: 4096'# $acoustic_config_file
+ $SED -i s#'minimum_phase_order\s*:.*'#'minimum_phase_order: 2047'# $acoustic_config_file
+ fi
+ $SED -i s#'fw_alpha\s*:.*'#'fw_alpha: 0.77'# $acoustic_config_file
+else
+ echo "This sampling frequency ($SamplingFreq) never tested before...please configure yourself!!"
+fi
+
+
+# [Architecture]
+if [[ "$Voice" == *"demo"* ]]
+then
+ $SED -i s#'hidden_layer_size\s*:.*'#'hidden_layer_size: [512, 512, 512, 512]'# $acoustic_config_file
+ $SED -i s#'hidden_layer_type\s*:.*'#'hidden_layer_type: ['\''TANH'\'', '\''TANH'\'', '\''TANH'\'', '\''TANH'\'']'# $acoustic_config_file
+fi
+
+
+# [Data]
+$SED -i s#'train_file_number\s*:.*'#'train_file_number: '${Train}# $acoustic_config_file
+$SED -i s#'valid_file_number\s*:.*'#'valid_file_number: '${Valid}# $acoustic_config_file
+$SED -i s#'test_file_number\s*:.*'#'test_file_number: '${Test}# $acoustic_config_file
+
+
+# [Processes]
+
+$SED -i s#'AcousticModel\s*:.*'#'AcousticModel: True'# $acoustic_config_file
+$SED -i s#'GenTestList\s*:.*'#'GenTestList: True'# $acoustic_config_file
+
+$SED -i s#'MAKECMP\s*:.*'#'MAKECMP: False'# $acoustic_config_file
+$SED -i s#'NORMCMP\s*:.*'#'NORMCMP: False'# $acoustic_config_file
+$SED -i s#'TRAINDNN\s*:.*'#'TRAINDNN: False'# $acoustic_config_file
+$SED -i s#'CALMCD\s*:.*'#'CALMCD: False'# $acoustic_config_file
+
+
+echo "Acoustic configuration settings stored in $acoustic_config_file"
diff --git a/egs/roger_blizzard2008/s1/scripts/prepare_labels_from_txt.sh b/egs/roger_blizzard2008/s1/scripts/prepare_labels_from_txt.sh
new file mode 100755
index 00000000..0a35754a
--- /dev/null
+++ b/egs/roger_blizzard2008/s1/scripts/prepare_labels_from_txt.sh
@@ -0,0 +1,85 @@
+#!/bin/bash
+
+if test "$#" -lt 3; then
+ echo "Usage: ./scripts/prepare_labels_from_txt.sh "
+ exit 1
+fi
+
+### arguments
+inp_txt=$1
+lab_dir=$2
+global_config_file=$3
+
+if [ ! -f $global_config_file ]; then
+ echo "Global config file doesn't exist"
+ exit 1
+else
+ source $global_config_file
+fi
+
+if test "$#" -eq 3; then
+ train=false
+else
+ train=$4
+fi
+
+### tools required
+if [ ! -d "${FESTDIR}" ]; then
+ echo "Please configure festival path in $global_config_file !!"
+ exit 1
+fi
+
+### define few variables here
+frontend=${MerlinDir}/misc/scripts/frontend
+out_dir=$lab_dir
+
+if [ "$train" = true ]; then
+ file_id_scp=file_id_list.scp
+ scheme_file=train_sentences.scm
+else
+ file_id_scp=test_id_list.scp
+ scheme_file=new_test_sentences.scm
+fi
+
+### generate a scheme file
+python ${frontend}/utils/genScmFile.py \
+ ${inp_txt} \
+ ${out_dir}/prompt-utt \
+ ${out_dir}/$scheme_file \
+ ${out_dir}/$file_id_scp
+
+### generate utt from scheme file
+echo "generating utts from scheme file"
+${FESTDIR}/bin/festival -b ${out_dir}/$scheme_file
+
+### convert festival utt to lab
+echo "converting festival utts to labels..."
+${frontend}/festival_utt_to_lab/make_labels \
+ ${out_dir}/prompt-lab \
+ ${out_dir}/prompt-utt \
+ ${FESTDIR}/examples/dumpfeats \
+ ${frontend}/festival_utt_to_lab
+
+### normalize lab for merlin with options: state_align or phone_align
+echo "normalizing label files for merlin..."
+if [ "$train" = true ]; then
+ python ${frontend}/utils/normalize_lab_for_merlin.py \
+ ${out_dir}/prompt-lab/full \
+ ${out_dir}/label_no_align \
+ phone_align \
+ ${out_dir}/$file_id_scp 0
+ ### remove any un-necessary files
+ rm -rf ${out_dir}/prompt-lab
+else
+ python ${frontend}/utils/normalize_lab_for_merlin.py \
+ ${out_dir}/prompt-lab/full \
+ ${out_dir}/prompt-lab \
+ ${Labels} \
+ ${out_dir}/$file_id_scp
+ ### remove any un-necessary files
+ rm -rf ${out_dir}/prompt-lab/{full,mono,tmp}
+
+ echo "Labels are ready in: ${out_dir}/prompt-lab !!"
+fi
+
+
diff --git a/egs/roger_blizzard2008/s1/scripts/queue.pl b/egs/roger_blizzard2008/s1/scripts/queue.pl
new file mode 100755
index 00000000..eb9ef3de
--- /dev/null
+++ b/egs/roger_blizzard2008/s1/scripts/queue.pl
@@ -0,0 +1,312 @@
+#!/usr/bin/perl
+# Copyright 2012 Johns Hopkins University (Author: Daniel Povey).
+# Apache 2.0.
+use File::Basename;
+use Cwd;
+
+# queue.pl has the same functionality as run.pl, except that
+# it runs the job in question on the queue (Sun GridEngine).
+# This version of queue.pl uses the task array functionality
+# of the grid engine. Note: it's different from the queue.pl
+# in the s4 and earlier scripts.
+
+$qsub_opts = "";
+$sync = 0;
+
+for ($x = 1; $x <= 3; $x++) { # This for-loop is to
+ # allow the JOB=1:n option to be interleaved with the
+ # options to qsub.
+ while (@ARGV >= 2 && $ARGV[0] =~ m:^-:) {
+ $switch = shift @ARGV;
+ if ($switch eq "-V") {
+ $qsub_opts .= "-V ";
+ } else {
+ $option = shift @ARGV;
+ if ($switch eq "-sync" && $option =~ m/^[yY]/) {
+ $sync = 1;
+ }
+ $qsub_opts .= "$switch $option ";
+ if ($switch eq "-pe") { # e.g. -pe smp 5
+ $option2 = shift @ARGV;
+ $qsub_opts .= "$option2 ";
+ }
+ }
+ }
+ if ($ARGV[0] =~ m/^([\w_][\w\d_]*)+=(\d+):(\d+)$/) {
+ $jobname = $1;
+ $jobstart = $2;
+ $jobend = $3;
+ shift;
+ if ($jobstart > $jobend) {
+ die "queue.pl: invalid job range $ARGV[0]";
+ }
+ } elsif ($ARGV[0] =~ m/^([\w_][\w\d_]*)+=(\d+)$/) { # e.g. JOB=1.
+ $jobname = $1;
+ $jobstart = $2;
+ $jobend = $2;
+ shift;
+ } elsif ($ARGV[0] =~ m/.+\=.*\:.*$/) {
+ print STDERR "Warning: suspicious first argument to queue.pl: $ARGV[0]\n";
+ }
+}
+
+
+if (@ARGV < 2) {
+ print STDERR
+ "Usage: queue.pl [options to qsub] [JOB=1:n] log-file command-line arguments...\n" .
+ "e.g.: queue.pl foo.log echo baz\n" .
+ " (which will echo \"baz\", with stdout and stderr directed to foo.log)\n" .
+ "or: queue.pl -q all.q\@xyz foo.log echo bar \| sed s/bar/baz/ \n" .
+ " (which is an example of using a pipe; you can provide other escaped bash constructs)\n" .
+ "or: queue.pl -q all.q\@qyz JOB=1:10 foo.JOB.log echo JOB \n" .
+ " (which illustrates the mechanism to submit parallel jobs; note, you can use \n" .
+ " another string other than JOB)\n" .
+ "Note: if you pass the \"-sync y\" option to qsub, this script will take note\n" .
+ "and change its behavior. Otherwise it uses qstat to work out when the job finished\n";
+ exit 1;
+}
+
+$cwd = getcwd();
+$logfile = shift @ARGV;
+
+if (defined $jobname && $logfile !~ m/$jobname/
+ && $jobend > $jobstart) {
+ print STDERR "queue.pl: you are trying to run a parallel job but "
+ . "you are putting the output into just one log file ($logfile)\n";
+ exit(1);
+}
+
+#
+# Work out the command; quote escaping is done here.
+# Note: the rules for escaping stuff are worked out pretty
+# arbitrarily, based on what we want it to do. Some things that
+# we pass as arguments to queue.pl, such as "|", we want to be
+# interpreted by bash, so we don't escape them. Other things,
+# such as archive specifiers like 'ark:gunzip -c foo.gz|', we want
+# to be passed, in quotes, to the Kaldi program. Our heuristic
+# is that stuff with spaces in should be quoted. This doesn't
+# always work.
+#
+$cmd = "";
+
+foreach $x (@ARGV) {
+ if ($x =~ m/^\S+$/) { $cmd .= $x . " "; } # If string contains no spaces, take
+ # as-is.
+ elsif ($x =~ m:\":) { $cmd .= "'\''$x'\'' "; } # else if no dbl-quotes, use single
+ else { $cmd .= "\"$x\" "; } # else use double.
+}
+
+#
+# Work out the location of the script file, and open it for writing.
+#
+$dir = dirname($logfile);
+$base = basename($logfile);
+$qdir = "$dir/q";
+$qdir =~ s:/(log|LOG)/*q:/q:; # If qdir ends in .../log/q, make it just .../q.
+$queue_logfile = "$qdir/$base";
+
+if (!-d $dir) { system "mkdir $dir 2>/dev/null"; } # another job may be doing this...
+if (!-d $dir) { die "Cannot make the directory $dir\n"; }
+# make a directory called "q",
+# where we will put the log created by qsub... normally this doesn't contain
+# anything interesting, evertyhing goes to $logfile.
+if (! -d "$qdir") {
+ system "mkdir $qdir 2>/dev/null";
+ sleep(5); ## This is to fix an issue we encountered in denominator lattice creation,
+ ## where if e.g. the exp/tri2b_denlats/log/15/q directory had just been
+ ## created and the job immediately ran, it would die with an error because nfs
+ ## had not yet synced. I'm also decreasing the acdirmin and acdirmax in our
+ ## NFS settings to something like 5 seconds.
+}
+
+if (defined $jobname) { # It's an array job.
+ $queue_array_opt = "-t $jobstart:$jobend";
+ $logfile =~ s/$jobname/\$SGE_TASK_ID/g; # This variable will get
+ # replaced by qsub, in each job, with the job-id.
+ $cmd =~ s/$jobname/\$SGE_TASK_ID/g; # same for the command...
+ $queue_logfile =~ s/\.?$jobname//; # the log file in the q/ subdirectory
+ # is for the queue to put its log, and this doesn't need the task array subscript
+ # so we remove it.
+}
+
+# queue_scriptfile is as $queue_logfile [e.g. dir/q/foo.log] but
+# with the suffix .sh.
+$queue_scriptfile = $queue_logfile;
+($queue_scriptfile =~ s/\.[a-zA-Z]{1,5}$/.sh/) || ($queue_scriptfile .= ".sh");
+if ($queue_scriptfile !~ m:^/:) {
+ $queue_scriptfile = $cwd . "/" . $queue_scriptfile; # just in case.
+}
+
+# We'll write to the standard input of "qsub" (the file-handle Q),
+# the job that we want it to execute.
+# Also keep our current PATH around, just in case there was something
+# in it that we need (although we also source ./path.sh)
+
+$syncfile = "$qdir/done.$$";
+
+system("rm $queue_logfile $syncfile 2>/dev/null");
+#
+# Write to the script file, and then close it.
+#
+open(Q, ">$queue_scriptfile") || die "Failed to write to $queue_scriptfile";
+
+print Q "#!/bin/bash\n";
+print Q "cd $cwd\n";
+print Q ". ./path.sh\n";
+print Q "( echo '#' Running on \`hostname\`\n";
+print Q " echo '#' Started at \`date\`\n";
+print Q " echo -n '# '; cat <$logfile\n";
+print Q " ( $cmd ) 2>>$logfile >>$logfile\n";
+print Q "ret=\$?\n";
+print Q "echo '#' Finished at \`date\` with status \$ret >>$logfile\n";
+print Q "[ \$ret -eq 137 ] && exit 100;\n"; # If process was killed (e.g. oom) it will exit with status 137;
+ # let the script return with status 100 which will put it to E state; more easily rerunnable.
+if (!defined $jobname) { # not an array job
+ print Q "touch $syncfile\n"; # so we know it's done.
+} else {
+ print Q "touch $syncfile.\$SGE_TASK_ID\n"; # touch a bunch of sync-files.
+}
+print Q "exit \$[\$ret ? 1 : 0]\n"; # avoid status 100 which grid-engine
+print Q "## submitted with:\n"; # treats specially.
+print Q "# $qsub_cmd\n";
+if (!close(Q)) { # close was not successful... || die "Could not close script file $shfile";
+ die "Failed to close the script file (full disk?)";
+}
+
+$ret = system ("qsub -S /bin/bash -v PATH -cwd -j y -o $queue_logfile $qsub_opts $queue_array_opt $queue_scriptfile >>$queue_logfile 2>&1");
+if ($ret != 0) {
+ if ($sync && $ret == 256) { # this is the exit status when a job failed (bad exit status)
+ if (defined $jobname) { $logfile =~ s/\$SGE_TASK_ID/*/g; }
+ print STDERR "queue.pl: job writing to $logfile failed\n";
+ } else {
+ print STDERR "queue.pl: error submitting jobs to queue (return status was $ret)\n";
+ print STDERR `tail $queue_logfile`;
+ }
+ exit(1);
+}
+
+if (! $sync) { # We're not submitting with -sync y, so we
+ # need to wait for the jobs to finish. We wait for the
+ # sync-files we "touched" in the script to exist.
+ @syncfiles = ();
+ if (!defined $jobname) { # not an array job.
+ push @syncfiles, $syncfile;
+ } else {
+ for ($jobid = $jobstart; $jobid <= $jobend; $jobid++) {
+ push @syncfiles, "$syncfile.$jobid";
+ }
+ }
+ # We will need the sge_job_id, to check that job still exists
+ $sge_job_id=`grep "Your job" $queue_logfile | awk '{ print \$3 }' | sed 's|\\\..*||'`;
+ chomp($sge_job_id);
+ $check_sge_job_ctr=1;
+ #
+ $wait = 0.1;
+ foreach $f (@syncfiles) {
+ # wait for them to finish one by one.
+ while (! -f $f) {
+ sleep($wait);
+ $wait *= 1.2;
+ if ($wait > 3.0) {
+ $wait = 3.0; # never wait more than 3 seconds.
+ if (rand() > 0.5) {
+ system("touch $qdir/.kick");
+ } else {
+ system("rm $qdir/.kick 2>/dev/null");
+ }
+ # This seems to kick NFS in the teeth to cause it to refresh the
+ # directory. I've seen cases where it would indefinitely fail to get
+ # updated, even though the file exists on the server.
+ system("ls $qdir >/dev/null");
+ }
+
+ # Check that the job exists in SGE. Job can be killed if duration
+ # exceeds some hard limit, or in case of a machine shutdown.
+ if(($check_sge_job_ctr++ % 10) == 0) { # Don't run qstat too often, avoid stress on SGE.
+ if ( -f $f ) { next; }; #syncfile appeared, ok
+ $ret = system("qstat -j $sge_job_id >/dev/null 2>/dev/null");
+ if($ret != 0) {
+ # Don't consider immediately missing job as error, first wait some
+ # time to make sure it is not just delayed creation of the syncfile.
+ sleep(3);
+ if ( -f $f ) { next; }; #syncfile appeared, ok
+ sleep(7);
+ if ( -f $f ) { next; }; #syncfile appeared, ok
+ sleep(20);
+ if ( -f $f ) { next; }; #syncfile appeared, ok
+ #Otherwise it is an error
+ if (defined $jobname) { $logfile =~ s/\$SGE_TASK_ID/*/g; }
+ print STDERR "queue.pl: Error, unfinished job no longer exists, log is in $logfile\n";
+ print STDERR " Possible reasons: a) Exceeded time limit? -> Use more jobs! b) Shutdown/Frozen machine? -> Run again!\n";
+ exit(1);
+ }
+ }
+ }
+ }
+ $all_syncfiles = join(" ", @syncfiles);
+ system("rm $all_syncfiles 2>/dev/null");
+}
+
+# OK, at this point we are synced; we know the job is done.
+# But we don't know about its exit status. We'll look at $logfile for this.
+# First work out an array @logfiles of file-locations we need to
+# read (just one, unless it's an array job).
+@logfiles = ();
+if (!defined $jobname) { # not an array job.
+ push @logfiles, $logfile;
+} else {
+ for ($jobid = $jobstart; $jobid <= $jobend; $jobid++) {
+ $l = $logfile;
+ $l =~ s/\$SGE_TASK_ID/$jobid/g;
+ push @logfiles, $l;
+ }
+}
+
+$num_failed = 0;
+foreach $l (@logfiles) {
+ @wait_times = (0.1, 0.2, 0.2, 0.3, 0.5, 0.5, 1.0, 2.0, 5.0, 5.0, 5.0, 10.0, 25.0);
+ for ($iter = 0; $iter <= @wait_times; $iter++) {
+ $line = `tail -10 $l 2>/dev/null`; # Note: although this line should be the last
+ # line of the file, I've seen cases where it was not quite the last line because
+ # of delayed output by the process that was running, or processes it had called.
+ # so tail -10 gives it a little leeway.
+ if ($line =~ m/with status (\d+)/) {
+ $status = $1;
+ last;
+ } else {
+ if ($iter < @wait_times) {
+ sleep($wait_times[$iter]);
+ } else {
+ if (! -f $l) {
+ print STDERR "Log-file $l does not exist.\n";
+ } else {
+ print STDERR "The last line of log-file $l does not seem to indicate the "
+ . "return status as expected\n";
+ }
+ exit(1); # Something went wrong with the queue, or the
+ # machine it was running on, probably.
+ }
+ }
+ }
+ # OK, now we have $status, which is the return-status of
+ # the command in the job.
+ if ($status != 0) { $num_failed++; }
+}
+if ($num_failed == 0) { exit(0); }
+else { # we failed.
+ if (@logfiles == 1) {
+ if (defined $jobname) { $logfile =~ s/\$SGE_TASK_ID/$jobstart/g; }
+ print STDERR "queue.pl: job failed with status $status, log is in $logfile\n";
+ if ($logfile =~ m/JOB/) {
+ print STDERR "queue.pl: probably you forgot to put JOB=1:\$nj in your script.\n";
+ }
+ } else {
+ if (defined $jobname) { $logfile =~ s/\$SGE_TASK_ID/*/g; }
+ $numjobs = 1 + $jobend - $jobstart;
+ print STDERR "queue.pl: $num_failed / $numjobs failed, log is in $logfile\n";
+ }
+ exit(1);
+}
diff --git a/egs/roger_blizzard2008/s1/scripts/remove_intermediate_files.sh b/egs/roger_blizzard2008/s1/scripts/remove_intermediate_files.sh
new file mode 100755
index 00000000..b68e8046
--- /dev/null
+++ b/egs/roger_blizzard2008/s1/scripts/remove_intermediate_files.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+if test "$#" -ne 1; then
+ echo "Usage: ./scripts/remove_intermediate_files.sh conf/global_settings.cfg"
+ exit 1
+fi
+
+if [ ! -f $1 ]; then
+ echo "Global config file doesn't exist"
+ exit 1
+else
+ source $1
+fi
+
+###################################################
+######## remove intermediate synth files ##########
+###################################################
+
+current_working_dir=$(pwd)
+
+synthesis_dir=${WorkDir}/experiments/${Voice}/test_synthesis
+gen_lab_dir=${synthesis_dir}/gen-lab
+gen_wav_dir=${synthesis_dir}/wav
+
+shopt -s extglob
+
+if [ -d "$gen_lab_dir" ]; then
+ cd ${gen_lab_dir}
+ rm -f *.!(lab)
+fi
+
+if [ -d "$gen_wav_dir" ]; then
+ cd ${gen_wav_dir}
+ rm -f weight
+ rm -f *.!(wav)
+fi
+
+cd ${current_working_dir}
diff --git a/egs/roger_blizzard2008/s1/scripts/run.pl b/egs/roger_blizzard2008/s1/scripts/run.pl
new file mode 100755
index 00000000..1750bc50
--- /dev/null
+++ b/egs/roger_blizzard2008/s1/scripts/run.pl
@@ -0,0 +1,148 @@
+#!/usr/bin/perl -w
+
+# In general, doing
+# run.pl some.log a b c is like running the command a b c in
+# the bash shell, and putting the standard error and output into some.log.
+# To run parallel jobs (backgrounded on the host machine), you can do (e.g.)
+# run.pl JOB=1:4 some.JOB.log a b c JOB is like running the command a b c JOB
+# and putting it in some.JOB.log, for each one. [Note: JOB can be any identifier].
+# If any of the jobs fails, this script will fail.
+
+# A typical example is:
+# run.pl some.log my-prog "--opt=foo bar" foo \| other-prog baz
+# and run.pl will run something like:
+# ( my-prog '--opt=foo bar' foo | other-prog baz ) >& some.log
+#
+# Basically it takes the command-line arguments, quotes them
+# as necessary to preserve spaces, and evaluates them with bash.
+# In addition it puts the command line at the top of the log, and
+# the start and end times of the command at the beginning and end.
+# The reason why this is useful is so that we can create a different
+# version of this program that uses a queueing system instead.
+
+@ARGV < 2 && die "usage: run.pl log-file command-line arguments...";
+
+$jobstart=1;
+$jobend=1;
+$qsub_opts=""; # These will be ignored.
+
+# First parse an option like JOB=1:4, and any
+# options that would normally be given to
+# queue.pl, which we will just discard.
+
+if (@ARGV > 0) {
+ while (@ARGV >= 2 && $ARGV[0] =~ m:^-:) { # parse any options
+ # that would normally go to qsub, but which will be ignored here.
+ $switch = shift @ARGV;
+ if ($switch eq "-V") {
+ $qsub_opts .= "-V ";
+ } else {
+ $option = shift @ARGV;
+ if ($switch eq "-sync" && $option =~ m/^[yY]/) {
+ $qsub_opts .= "-sync "; # Note: in the
+ # corresponding coce in queue.pl it says instead, just "$sync = 1;".
+ }
+ $qsub_opts .= "$switch $option ";
+ if ($switch eq "-pe") { # e.g. -pe smp 5
+ $option2 = shift @ARGV;
+ $qsub_opts .= "$option2 ";
+ }
+ }
+ }
+ if ($ARGV[0] =~ m/^([\w_][\w\d_]*)+=(\d+):(\d+)$/) {
+ $jobname = $1;
+ $jobstart = $2;
+ $jobend = $3;
+ shift;
+ if ($jobstart > $jobend) {
+ die "queue.pl: invalid job range $ARGV[0]";
+ }
+ } elsif ($ARGV[0] =~ m/^([\w_][\w\d_]*)+=(\d+)$/) { # e.g. JOB=1.
+ $jobname = $1;
+ $jobstart = $2;
+ $jobend = $2;
+ shift;
+ } elsif ($ARGV[0] =~ m/.+\=.*\:.*$/) {
+ print STDERR "Warning: suspicious first argument to queue.pl: $ARGV[0]\n";
+ }
+}
+
+if ($qsub_opts ne "") {
+ print STDERR "Warning: run.pl ignoring options \"$qsub_opts\"\n";
+}
+
+$logfile = shift @ARGV;
+
+if (defined $jobname && $logfile !~ m/$jobname/ &&
+ $jobend > $jobstart) {
+ print STDERR "run.pl: you are trying to run a parallel job but "
+ . "you are putting the output into just one log file ($logfile)\n";
+ exit(1);
+}
+
+$cmd = "";
+
+foreach $x (@ARGV) {
+ if ($x =~ m/^\S+$/) { $cmd .= $x . " "; }
+ elsif ($x =~ m:\":) { $cmd .= "'$x' "; }
+ else { $cmd .= "\"$x\" "; }
+}
+
+
+for ($jobid = $jobstart; $jobid <= $jobend; $jobid++) {
+ $childpid = fork();
+ if (!defined $childpid) { die "Error forking in run.pl (writing to $logfile)"; }
+ if ($childpid == 0) { # We're in the child... this branch
+ # executes the job and returns (possibly with an error status).
+ if (defined $jobname) {
+ $cmd =~ s/$jobname/$jobid/g;
+ $logfile =~ s/$jobname/$jobid/g;
+ }
+ system("mkdir -p `dirname $logfile` 2>/dev/null");
+ open(F, ">$logfile") || die "Error opening log file $logfile";
+ print F "# " . $cmd . "\n";
+ print F "# Started at " . `date`;
+ $starttime = `date +'%s'`;
+ print F "#\n";
+ close(F);
+
+ # Pipe into bash.. make sure we're not using any other shell.
+ open(B, "|bash") || die "Error opening shell command";
+ print B "( " . $cmd . ") 2>>$logfile >> $logfile";
+ close(B); # If there was an error, exit status is in $?
+ $ret = $?;
+
+ $endtime = `date +'%s'`;
+ open(F, ">>$logfile") || die "Error opening log file $logfile (again)";
+ $enddate = `date`;
+ chop $enddate;
+ print F "# Ended (code $ret) at " . $enddate . ", elapsed time " . ($endtime-$starttime) . " seconds\n";
+ close(F);
+ exit($ret == 0 ? 0 : 1);
+ }
+}
+
+$ret = 0;
+$numfail = 0;
+for ($jobid = $jobstart; $jobid <= $jobend; $jobid++) {
+ $r = wait();
+ if ($r == -1) { die "Error waiting for child process"; } # should never happen.
+ if ($? != 0) { $numfail++; $ret = 1; } # The child process failed.
+}
+
+if ($ret != 0) {
+ $njobs = $jobend - $jobstart + 1;
+ if ($njobs == 1) {
+ print STDERR "run.pl: job failed, log is in $logfile\n";
+ if ($logfile =~ m/JOB/) {
+ print STDERR "queue.pl: probably you forgot to put JOB=1:\$nj in your script.\n";
+ }
+ }
+ else {
+ $logfile =~ s/$jobname/*/g;
+ print STDERR "run.pl: $numfail / $njobs failed, log is in $logfile\n";
+ }
+}
+
+
+exit ($ret);
diff --git a/egs/roger_blizzard2008/s1/scripts/run_phone_aligner.sh b/egs/roger_blizzard2008/s1/scripts/run_phone_aligner.sh
new file mode 100755
index 00000000..957be2d4
--- /dev/null
+++ b/egs/roger_blizzard2008/s1/scripts/run_phone_aligner.sh
@@ -0,0 +1,88 @@
+#!/bin/bash -e
+
+if test "$#" -ne 4; then
+ echo "Usage: ./run_phone_aligner.sh "
+ exit 1
+fi
+
+### Arguments
+wav_dir=$1
+inp_txt=$2
+lab_dir=$3
+global_config_file=$4
+
+### Use paths from global config file
+source $global_config_file
+
+### frontend scripts
+frontend=${MerlinDir}/misc/scripts/frontend
+
+#################################################################
+##### Create training labels for merlin with festvox tools ######
+#################################################################
+
+### tools required
+
+if [[ ! -d "${ESTDIR}" ]] || [[ ! -d "${FESTDIR}" ]] || [[ ! -d "${FESTVOXDIR}" ]]; then
+ echo "Please configure paths to speech_tools, festival and festvox in config.cfg !!"
+ exit 1
+fi
+
+### do forced alignment using ehmm in clustergen setup
+mkdir -p $lab_dir
+cd $lab_dir
+mkdir cmu_us_${Voice}
+cd cmu_us_${Voice}
+
+$FESTVOXDIR/src/clustergen/setup_cg cmu us ${Voice}
+
+txt_file=${WorkDir}/${inp_txt}
+txt_dir=${WorkDir}/${inp_txt}
+
+if [ -f "${txt_file}" ]; then
+ cp ${txt_file} etc/txt.done.data
+elif [ -d "${txt_dir}" ]; then
+ python ${frontend}/utils/prepare_txt_done_data_file.py ${txt_dir} etc/txt.done.data
+else
+ echo "Please check ${inp_txt} !!"
+ exit 1
+fi
+
+cp $WorkDir/$wav_dir/*.wav wav/
+
+./bin/do_build build_prompts
+./bin/do_build label
+./bin/do_build build_utts
+
+cd ../
+
+### convert festival utts to lab
+
+cat cmu_us_${Voice}/etc/txt.done.data | cut -d " " -f 2 > file_id_list.scp
+
+echo "converting festival utts to labels..."
+${frontend}/festival_utt_to_lab/make_labels \
+ full-context-labels \
+ cmu_us_slt_arctic/festival/utts \
+ ${FESTDIR}/examples/dumpfeats \
+ ${frontend}/festival_utt_to_lab
+
+echo "normalizing label files for merlin..."
+python ${frontend}/utils/normalize_lab_for_merlin.py \
+ full-context-labels/full \
+ label_phone_align \
+ phone_align \
+ file_id_list.scp
+
+### return to working directory
+cd ${WorkDir}
+
+phone_labels=$lab_dir/label_phone_align
+
+if [ ! "$(ls -A ${phone_labels})" ]; then
+ echo "Force-alignment unsucessful!!"
+else
+ echo "You should have your labels ready in: $phone_labels !!"
+fi
+
+
diff --git a/egs/roger_blizzard2008/s1/scripts/run_state_aligner.sh b/egs/roger_blizzard2008/s1/scripts/run_state_aligner.sh
new file mode 100755
index 00000000..12b9bb00
--- /dev/null
+++ b/egs/roger_blizzard2008/s1/scripts/run_state_aligner.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+if test "$#" -ne 4; then
+ echo "Usage: ./run_state_aligner.sh "
+ exit 1
+fi
+
+### Arguments
+wav_dir=$1
+inp_txt=$2
+lab_dir=$3
+global_config_file=$4
+
+### Use paths from global config file
+source $global_config_file
+
+### force-alignment scripts
+aligner=${MerlinDir}/misc/scripts/alignment/state_align
+
+# initializations
+train=true
+
+####################################
+######## prepare labels ############
+####################################
+
+### do prepare full-contextual labels without timestamps
+echo "preparing full-contextual labels using Festival frontend..."
+bash ${WorkDir}/scripts/prepare_labels_from_txt.sh $inp_txt $lab_dir $global_config_file $train
+
+status_prev_step=$?
+if [ $status_prev_step -eq 1 ]; then
+ echo "Preparation of full-contextual labels unsuccessful!!"
+ echo "Please check scripts/prepare_labels_from_txt.sh"
+ exit 1
+fi
+
+### tools required
+if [[ ! -d "${HTKDIR}" ]]; then
+ echo "Please configure path to HTK tools in $global_config_file !!"
+ exit 1
+fi
+
+### do forced alignment using HVite
+echo "forced-alignment using HTK tools..."
+
+sed -i s#'HTKDIR =.*'#'HTKDIR = "'$HTKDIR'"'# $aligner/forced_alignment.py
+sed -i s#'work_dir =.*'#'work_dir = "'$WorkDir/$lab_dir'"'# $aligner/forced_alignment.py
+sed -i s#'wav_dir =.*'#'wav_dir = "'$WorkDir/$wav_dir'"'# $aligner/forced_alignment.py
+
+python $aligner/forced_alignment.py
+
+state_labels=$lab_dir/label_state_align
+
+if [ ! "$(ls -A ${state_labels})" ]; then
+ echo "Force-alignment unsucessful!! Please check $aligner/forced_alignment.py"
+else
+ echo "You should have your labels ready in: $state_labels !!"
+fi
+
diff --git a/egs/roger_blizzard2008/s1/scripts/submit.sh b/egs/roger_blizzard2008/s1/scripts/submit.sh
new file mode 100755
index 00000000..f0500afd
--- /dev/null
+++ b/egs/roger_blizzard2008/s1/scripts/submit.sh
@@ -0,0 +1,33 @@
+#!/bin/bash -e
+
+## Generic script for submitting any Theano job to GPU
+# usage: submit.sh [scriptname.py script_arguments ... ]
+
+src_dir=$(dirname $1)
+
+# Source install-related environment variables
+source ${src_dir}/setup_env.sh
+
+# Try to lock a GPU...
+gpu_id=$(python ${src_dir}/gpu_lock.py --id-to-hog)
+
+# Run the input command (run_merlin.py) with its arguments
+if [ $gpu_id -gt -1 ]; then
+ echo "Running on GPU id=$gpu_id ..."
+ THEANO_FLAGS="mode=FAST_RUN,device=gpu$gpu_id,"$MERLIN_THEANO_FLAGS
+ export THEANO_FLAGS
+
+{ # try
+ python $@
+ python ${src_dir}/gpu_lock.py --free $gpu_id
+} || { # catch
+ python ${src_dir}/gpu_lock.py --free $gpu_id
+}
+else
+ echo "No GPU is available! Running on CPU..."
+
+ THEANO_FLAGS=$MERLIN_THEANO_FLAGS
+ export THEANO_FLAGS
+
+ python $@
+fi
diff --git a/egs/roger_blizzard2008/s1/scripts/test_nan.sh b/egs/roger_blizzard2008/s1/scripts/test_nan.sh
new file mode 100755
index 00000000..41a52768
--- /dev/null
+++ b/egs/roger_blizzard2008/s1/scripts/test_nan.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+if test "$#" -ne 3; then
+ echo "bash scripts/test_nan.sh "
+ exit 1
+fi
+
+global_config_file="conf/global_settings.cfg"
+if [ ! -f $global_config_file ]; then
+ echo "Global config file doesn't exist"
+ exit 1
+else
+ source $global_config_file
+fi
+
+x2x=${MerlinDir}/tools/bin/SPTK-3.9/x2x
+
+ext=$3
+
+IFS=''
+while read sentence
+do
+ nlines=`$x2x +fa $1/$sentence$ext | grep "nan" | wc -l`
+ z=0
+ if test $nlines -gt $z
+ then
+ echo $sentence
+ echo $nlines
+ fi
+done < $2
diff --git a/egs/slt_arctic/s1/03_train_duration_model.sh b/egs/slt_arctic/s1/03_train_duration_model.sh
index 1b2411fe..3c657d70 100755
--- a/egs/slt_arctic/s1/03_train_duration_model.sh
+++ b/egs/slt_arctic/s1/03_train_duration_model.sh
@@ -18,6 +18,6 @@ duration_conf_file=$1
### Step 3: train duration model ###
echo "Step 3:"
echo "training duration model..."
-./scripts/submit.sh ${MerlinDir}/src/run_merlin.py $duration_conf_file
+./scripts/${cuda_cmd} "experiments/${Voice}/duration_model/log/_train.log" "./scripts/submit.sh" "${MerlinDir}/src/run_merlin.py" "${duration_conf_file}"
diff --git a/egs/slt_arctic/s1/04_train_acoustic_model.sh b/egs/slt_arctic/s1/04_train_acoustic_model.sh
index 59e21d38..7a085b8e 100755
--- a/egs/slt_arctic/s1/04_train_acoustic_model.sh
+++ b/egs/slt_arctic/s1/04_train_acoustic_model.sh
@@ -18,6 +18,6 @@ acoustic_conf_file=$1
### Step 4: train acoustic model ###
echo "Step 4:"
echo "training acoustic model..."
-./scripts/submit.sh ${MerlinDir}/src/run_merlin.py $acoustic_conf_file
+./scripts/${cuda_cmd} "experiments/${Voice}/acoustic_model/log/_train.log" "./scripts/submit.sh" "${MerlinDir}/src/run_merlin.py" "$acoustic_conf_file"
diff --git a/egs/slt_arctic/s1/05_run_merlin.sh b/egs/slt_arctic/s1/05_run_merlin.sh
index c7745e23..38838403 100755
--- a/egs/slt_arctic/s1/05_run_merlin.sh
+++ b/egs/slt_arctic/s1/05_run_merlin.sh
@@ -22,10 +22,10 @@ test_synth_config_file=$2
echo "Step 5:"
echo "synthesizing durations..."
-./scripts/submit.sh ${MerlinDir}/src/run_merlin.py $test_dur_config_file
+./scripts/${cuda_short_cmd} "experiments/${Voice}/test_synthesis/_dur.log" "./scripts/submit.sh" "${MerlinDir}/src/run_merlin.py" "$test_dur_config_file"
echo "synthesizing speech..."
-./scripts/submit.sh ${MerlinDir}/src/run_merlin.py $test_synth_config_file
+./scripts/${cuda_short_cmd} "experiments/${Voice}/test_synthesis/_synth.log" "./scripts/submit.sh" "${MerlinDir}/src/run_merlin.py" "$test_synth_config_file"
echo "deleting intermediate synthesis files..."
./scripts/remove_intermediate_files.sh $global_config_file
diff --git a/egs/slt_arctic/s1/cmd.sh b/egs/slt_arctic/s1/cmd.sh
new file mode 100755
index 00000000..158e72c8
--- /dev/null
+++ b/egs/slt_arctic/s1/cmd.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+# "queue.pl" uses qsub. The options to it are
+# options to qsub. If you have GridEngine installed,
+# change this to a queue you have access to.
+# Otherwise, use "run.pl", which will run jobs locally
+# (make sure your --num-jobs options are no more than
+# the number of cpus on your machine.
+
+#a) Sun grid options (IDIAP)
+# ATTENTION: Do that in your shell: SETSHELL grid
+#export cuda_cmd="queue.pl -l gpu"
+#export cuda_short_cmd="queue.pl -l sgpu"
+#export cuda_cmd="queue.pl -l q1d,hostname=dynamix03"
+#export cuda_cmd="..."
+
+#b) BUT cluster options
+#export cuda_cmd="queue.pl -q long.q@@pco203 -l gpu=1"
+#export cuda_cmd="queue.pl -q long.q@pcspeech-gpu"
+
+#c) run it locally...
+export cuda_cmd=run.pl
+export cuda_short_cmd=$cuda_cmd
diff --git a/egs/slt_arctic/s1/merlin_synthesis.sh b/egs/slt_arctic/s1/merlin_synthesis.sh
index f24b7b27..74c54b2d 100755
--- a/egs/slt_arctic/s1/merlin_synthesis.sh
+++ b/egs/slt_arctic/s1/merlin_synthesis.sh
@@ -1,5 +1,7 @@
#!/bin/bash
+source cmd.sh
+
if test "$#" -ne 0; then
echo "Usage: ./merlin_synthesis.sh"
exit 1
@@ -39,8 +41,8 @@ fi
### Step 2: synthesize speech ###
echo "Step 2: synthesizing speech..."
-./scripts/submit.sh ${MerlinDir}/src/run_merlin.py conf/test_dur_synth_${Voice}.conf
-./scripts/submit.sh ${MerlinDir}/src/run_merlin.py conf/test_synth_${Voice}.conf
+./scripts/${cuda_short_cmd} "experiments/${Voice}/test_synthesis/_synth_dur.log" "./scripts/submit.sh" "${MerlinDir}/src/run_merlin.py" "conf/test_dur_synth_${Voice}.conf"
+./scripts/${cuda_short_cmd} "experiments/${Voice}/test_synthesis/_synth_speech.log" "./scripts/submit.sh" "${MerlinDir}/src/run_merlin.py" "conf/test_synth_${Voice}.conf"
echo "deleting intermediate synthesis files..."
./scripts/remove_intermediate_files.sh $global_config_file
diff --git a/egs/slt_arctic/s1/run_demo.sh b/egs/slt_arctic/s1/run_demo.sh
index da2f2dab..a9fb2ff0 100755
--- a/egs/slt_arctic/s1/run_demo.sh
+++ b/egs/slt_arctic/s1/run_demo.sh
@@ -1,10 +1,13 @@
#!/bin/bash -e
+source cmd.sh
+
if test "$#" -ne 0; then
echo "Usage: ./run_demo.sh"
exit 1
fi
+
### Step 1: setup directories and the training data files ###
./01_setup.sh slt_arctic_demo
diff --git a/egs/slt_arctic/s1/run_full_voice.sh b/egs/slt_arctic/s1/run_full_voice.sh
index 732044bf..cd19e0c5 100755
--- a/egs/slt_arctic/s1/run_full_voice.sh
+++ b/egs/slt_arctic/s1/run_full_voice.sh
@@ -1,5 +1,7 @@
#!/bin/bash -e
+source cmd.sh
+
if test "$#" -ne 0; then
echo "Usage: ./run_full_voice.sh"
exit 1
diff --git a/egs/slt_arctic/s1/scripts/queue.pl b/egs/slt_arctic/s1/scripts/queue.pl
new file mode 100755
index 00000000..eb9ef3de
--- /dev/null
+++ b/egs/slt_arctic/s1/scripts/queue.pl
@@ -0,0 +1,312 @@
+#!/usr/bin/perl
+# Copyright 2012 Johns Hopkins University (Author: Daniel Povey).
+# Apache 2.0.
+use File::Basename;
+use Cwd;
+
+# queue.pl has the same functionality as run.pl, except that
+# it runs the job in question on the queue (Sun GridEngine).
+# This version of queue.pl uses the task array functionality
+# of the grid engine. Note: it's different from the queue.pl
+# in the s4 and earlier scripts.
+
+$qsub_opts = "";
+$sync = 0;
+
+for ($x = 1; $x <= 3; $x++) { # This for-loop is to
+ # allow the JOB=1:n option to be interleaved with the
+ # options to qsub.
+ while (@ARGV >= 2 && $ARGV[0] =~ m:^-:) {
+ $switch = shift @ARGV;
+ if ($switch eq "-V") {
+ $qsub_opts .= "-V ";
+ } else {
+ $option = shift @ARGV;
+ if ($switch eq "-sync" && $option =~ m/^[yY]/) {
+ $sync = 1;
+ }
+ $qsub_opts .= "$switch $option ";
+ if ($switch eq "-pe") { # e.g. -pe smp 5
+ $option2 = shift @ARGV;
+ $qsub_opts .= "$option2 ";
+ }
+ }
+ }
+ if ($ARGV[0] =~ m/^([\w_][\w\d_]*)+=(\d+):(\d+)$/) {
+ $jobname = $1;
+ $jobstart = $2;
+ $jobend = $3;
+ shift;
+ if ($jobstart > $jobend) {
+ die "queue.pl: invalid job range $ARGV[0]";
+ }
+ } elsif ($ARGV[0] =~ m/^([\w_][\w\d_]*)+=(\d+)$/) { # e.g. JOB=1.
+ $jobname = $1;
+ $jobstart = $2;
+ $jobend = $2;
+ shift;
+ } elsif ($ARGV[0] =~ m/.+\=.*\:.*$/) {
+ print STDERR "Warning: suspicious first argument to queue.pl: $ARGV[0]\n";
+ }
+}
+
+
+if (@ARGV < 2) {
+ print STDERR
+ "Usage: queue.pl [options to qsub] [JOB=1:n] log-file command-line arguments...\n" .
+ "e.g.: queue.pl foo.log echo baz\n" .
+ " (which will echo \"baz\", with stdout and stderr directed to foo.log)\n" .
+ "or: queue.pl -q all.q\@xyz foo.log echo bar \| sed s/bar/baz/ \n" .
+ " (which is an example of using a pipe; you can provide other escaped bash constructs)\n" .
+ "or: queue.pl -q all.q\@qyz JOB=1:10 foo.JOB.log echo JOB \n" .
+ " (which illustrates the mechanism to submit parallel jobs; note, you can use \n" .
+ " another string other than JOB)\n" .
+ "Note: if you pass the \"-sync y\" option to qsub, this script will take note\n" .
+ "and change its behavior. Otherwise it uses qstat to work out when the job finished\n";
+ exit 1;
+}
+
+$cwd = getcwd();
+$logfile = shift @ARGV;
+
+if (defined $jobname && $logfile !~ m/$jobname/
+ && $jobend > $jobstart) {
+ print STDERR "queue.pl: you are trying to run a parallel job but "
+ . "you are putting the output into just one log file ($logfile)\n";
+ exit(1);
+}
+
+#
+# Work out the command; quote escaping is done here.
+# Note: the rules for escaping stuff are worked out pretty
+# arbitrarily, based on what we want it to do. Some things that
+# we pass as arguments to queue.pl, such as "|", we want to be
+# interpreted by bash, so we don't escape them. Other things,
+# such as archive specifiers like 'ark:gunzip -c foo.gz|', we want
+# to be passed, in quotes, to the Kaldi program. Our heuristic
+# is that stuff with spaces in should be quoted. This doesn't
+# always work.
+#
+$cmd = "";
+
+foreach $x (@ARGV) {
+ if ($x =~ m/^\S+$/) { $cmd .= $x . " "; } # If string contains no spaces, take
+ # as-is.
+ elsif ($x =~ m:\":) { $cmd .= "'\''$x'\'' "; } # else if no dbl-quotes, use single
+ else { $cmd .= "\"$x\" "; } # else use double.
+}
+
+#
+# Work out the location of the script file, and open it for writing.
+#
+$dir = dirname($logfile);
+$base = basename($logfile);
+$qdir = "$dir/q";
+$qdir =~ s:/(log|LOG)/*q:/q:; # If qdir ends in .../log/q, make it just .../q.
+$queue_logfile = "$qdir/$base";
+
+if (!-d $dir) { system "mkdir $dir 2>/dev/null"; } # another job may be doing this...
+if (!-d $dir) { die "Cannot make the directory $dir\n"; }
+# make a directory called "q",
+# where we will put the log created by qsub... normally this doesn't contain
+# anything interesting, evertyhing goes to $logfile.
+if (! -d "$qdir") {
+ system "mkdir $qdir 2>/dev/null";
+ sleep(5); ## This is to fix an issue we encountered in denominator lattice creation,
+ ## where if e.g. the exp/tri2b_denlats/log/15/q directory had just been
+ ## created and the job immediately ran, it would die with an error because nfs
+ ## had not yet synced. I'm also decreasing the acdirmin and acdirmax in our
+ ## NFS settings to something like 5 seconds.
+}
+
+if (defined $jobname) { # It's an array job.
+ $queue_array_opt = "-t $jobstart:$jobend";
+ $logfile =~ s/$jobname/\$SGE_TASK_ID/g; # This variable will get
+ # replaced by qsub, in each job, with the job-id.
+ $cmd =~ s/$jobname/\$SGE_TASK_ID/g; # same for the command...
+ $queue_logfile =~ s/\.?$jobname//; # the log file in the q/ subdirectory
+ # is for the queue to put its log, and this doesn't need the task array subscript
+ # so we remove it.
+}
+
+# queue_scriptfile is as $queue_logfile [e.g. dir/q/foo.log] but
+# with the suffix .sh.
+$queue_scriptfile = $queue_logfile;
+($queue_scriptfile =~ s/\.[a-zA-Z]{1,5}$/.sh/) || ($queue_scriptfile .= ".sh");
+if ($queue_scriptfile !~ m:^/:) {
+ $queue_scriptfile = $cwd . "/" . $queue_scriptfile; # just in case.
+}
+
+# We'll write to the standard input of "qsub" (the file-handle Q),
+# the job that we want it to execute.
+# Also keep our current PATH around, just in case there was something
+# in it that we need (although we also source ./path.sh)
+
+$syncfile = "$qdir/done.$$";
+
+system("rm $queue_logfile $syncfile 2>/dev/null");
+#
+# Write to the script file, and then close it.
+#
+open(Q, ">$queue_scriptfile") || die "Failed to write to $queue_scriptfile";
+
+print Q "#!/bin/bash\n";
+print Q "cd $cwd\n";
+print Q ". ./path.sh\n";
+print Q "( echo '#' Running on \`hostname\`\n";
+print Q " echo '#' Started at \`date\`\n";
+print Q " echo -n '# '; cat <$logfile\n";
+print Q " ( $cmd ) 2>>$logfile >>$logfile\n";
+print Q "ret=\$?\n";
+print Q "echo '#' Finished at \`date\` with status \$ret >>$logfile\n";
+print Q "[ \$ret -eq 137 ] && exit 100;\n"; # If process was killed (e.g. oom) it will exit with status 137;
+ # let the script return with status 100 which will put it to E state; more easily rerunnable.
+if (!defined $jobname) { # not an array job
+ print Q "touch $syncfile\n"; # so we know it's done.
+} else {
+ print Q "touch $syncfile.\$SGE_TASK_ID\n"; # touch a bunch of sync-files.
+}
+print Q "exit \$[\$ret ? 1 : 0]\n"; # avoid status 100 which grid-engine
+print Q "## submitted with:\n"; # treats specially.
+print Q "# $qsub_cmd\n";
+if (!close(Q)) { # close was not successful... || die "Could not close script file $shfile";
+ die "Failed to close the script file (full disk?)";
+}
+
+$ret = system ("qsub -S /bin/bash -v PATH -cwd -j y -o $queue_logfile $qsub_opts $queue_array_opt $queue_scriptfile >>$queue_logfile 2>&1");
+if ($ret != 0) {
+ if ($sync && $ret == 256) { # this is the exit status when a job failed (bad exit status)
+ if (defined $jobname) { $logfile =~ s/\$SGE_TASK_ID/*/g; }
+ print STDERR "queue.pl: job writing to $logfile failed\n";
+ } else {
+ print STDERR "queue.pl: error submitting jobs to queue (return status was $ret)\n";
+ print STDERR `tail $queue_logfile`;
+ }
+ exit(1);
+}
+
+if (! $sync) { # We're not submitting with -sync y, so we
+ # need to wait for the jobs to finish. We wait for the
+ # sync-files we "touched" in the script to exist.
+ @syncfiles = ();
+ if (!defined $jobname) { # not an array job.
+ push @syncfiles, $syncfile;
+ } else {
+ for ($jobid = $jobstart; $jobid <= $jobend; $jobid++) {
+ push @syncfiles, "$syncfile.$jobid";
+ }
+ }
+ # We will need the sge_job_id, to check that job still exists
+ $sge_job_id=`grep "Your job" $queue_logfile | awk '{ print \$3 }' | sed 's|\\\..*||'`;
+ chomp($sge_job_id);
+ $check_sge_job_ctr=1;
+ #
+ $wait = 0.1;
+ foreach $f (@syncfiles) {
+ # wait for them to finish one by one.
+ while (! -f $f) {
+ sleep($wait);
+ $wait *= 1.2;
+ if ($wait > 3.0) {
+ $wait = 3.0; # never wait more than 3 seconds.
+ if (rand() > 0.5) {
+ system("touch $qdir/.kick");
+ } else {
+ system("rm $qdir/.kick 2>/dev/null");
+ }
+ # This seems to kick NFS in the teeth to cause it to refresh the
+ # directory. I've seen cases where it would indefinitely fail to get
+ # updated, even though the file exists on the server.
+ system("ls $qdir >/dev/null");
+ }
+
+ # Check that the job exists in SGE. Job can be killed if duration
+ # exceeds some hard limit, or in case of a machine shutdown.
+ if(($check_sge_job_ctr++ % 10) == 0) { # Don't run qstat too often, avoid stress on SGE.
+ if ( -f $f ) { next; }; #syncfile appeared, ok
+ $ret = system("qstat -j $sge_job_id >/dev/null 2>/dev/null");
+ if($ret != 0) {
+ # Don't consider immediately missing job as error, first wait some
+ # time to make sure it is not just delayed creation of the syncfile.
+ sleep(3);
+ if ( -f $f ) { next; }; #syncfile appeared, ok
+ sleep(7);
+ if ( -f $f ) { next; }; #syncfile appeared, ok
+ sleep(20);
+ if ( -f $f ) { next; }; #syncfile appeared, ok
+ #Otherwise it is an error
+ if (defined $jobname) { $logfile =~ s/\$SGE_TASK_ID/*/g; }
+ print STDERR "queue.pl: Error, unfinished job no longer exists, log is in $logfile\n";
+ print STDERR " Possible reasons: a) Exceeded time limit? -> Use more jobs! b) Shutdown/Frozen machine? -> Run again!\n";
+ exit(1);
+ }
+ }
+ }
+ }
+ $all_syncfiles = join(" ", @syncfiles);
+ system("rm $all_syncfiles 2>/dev/null");
+}
+
+# OK, at this point we are synced; we know the job is done.
+# But we don't know about its exit status. We'll look at $logfile for this.
+# First work out an array @logfiles of file-locations we need to
+# read (just one, unless it's an array job).
+@logfiles = ();
+if (!defined $jobname) { # not an array job.
+ push @logfiles, $logfile;
+} else {
+ for ($jobid = $jobstart; $jobid <= $jobend; $jobid++) {
+ $l = $logfile;
+ $l =~ s/\$SGE_TASK_ID/$jobid/g;
+ push @logfiles, $l;
+ }
+}
+
+$num_failed = 0;
+foreach $l (@logfiles) {
+ @wait_times = (0.1, 0.2, 0.2, 0.3, 0.5, 0.5, 1.0, 2.0, 5.0, 5.0, 5.0, 10.0, 25.0);
+ for ($iter = 0; $iter <= @wait_times; $iter++) {
+ $line = `tail -10 $l 2>/dev/null`; # Note: although this line should be the last
+ # line of the file, I've seen cases where it was not quite the last line because
+ # of delayed output by the process that was running, or processes it had called.
+ # so tail -10 gives it a little leeway.
+ if ($line =~ m/with status (\d+)/) {
+ $status = $1;
+ last;
+ } else {
+ if ($iter < @wait_times) {
+ sleep($wait_times[$iter]);
+ } else {
+ if (! -f $l) {
+ print STDERR "Log-file $l does not exist.\n";
+ } else {
+ print STDERR "The last line of log-file $l does not seem to indicate the "
+ . "return status as expected\n";
+ }
+ exit(1); # Something went wrong with the queue, or the
+ # machine it was running on, probably.
+ }
+ }
+ }
+ # OK, now we have $status, which is the return-status of
+ # the command in the job.
+ if ($status != 0) { $num_failed++; }
+}
+if ($num_failed == 0) { exit(0); }
+else { # we failed.
+ if (@logfiles == 1) {
+ if (defined $jobname) { $logfile =~ s/\$SGE_TASK_ID/$jobstart/g; }
+ print STDERR "queue.pl: job failed with status $status, log is in $logfile\n";
+ if ($logfile =~ m/JOB/) {
+ print STDERR "queue.pl: probably you forgot to put JOB=1:\$nj in your script.\n";
+ }
+ } else {
+ if (defined $jobname) { $logfile =~ s/\$SGE_TASK_ID/*/g; }
+ $numjobs = 1 + $jobend - $jobstart;
+ print STDERR "queue.pl: $num_failed / $numjobs failed, log is in $logfile\n";
+ }
+ exit(1);
+}
diff --git a/egs/slt_arctic/s1/scripts/run.pl b/egs/slt_arctic/s1/scripts/run.pl
new file mode 100755
index 00000000..1750bc50
--- /dev/null
+++ b/egs/slt_arctic/s1/scripts/run.pl
@@ -0,0 +1,148 @@
+#!/usr/bin/perl -w
+
+# In general, doing
+# run.pl some.log a b c is like running the command a b c in
+# the bash shell, and putting the standard error and output into some.log.
+# To run parallel jobs (backgrounded on the host machine), you can do (e.g.)
+# run.pl JOB=1:4 some.JOB.log a b c JOB is like running the command a b c JOB
+# and putting it in some.JOB.log, for each one. [Note: JOB can be any identifier].
+# If any of the jobs fails, this script will fail.
+
+# A typical example is:
+# run.pl some.log my-prog "--opt=foo bar" foo \| other-prog baz
+# and run.pl will run something like:
+# ( my-prog '--opt=foo bar' foo | other-prog baz ) >& some.log
+#
+# Basically it takes the command-line arguments, quotes them
+# as necessary to preserve spaces, and evaluates them with bash.
+# In addition it puts the command line at the top of the log, and
+# the start and end times of the command at the beginning and end.
+# The reason why this is useful is so that we can create a different
+# version of this program that uses a queueing system instead.
+
+@ARGV < 2 && die "usage: run.pl log-file command-line arguments...";
+
+$jobstart=1;
+$jobend=1;
+$qsub_opts=""; # These will be ignored.
+
+# First parse an option like JOB=1:4, and any
+# options that would normally be given to
+# queue.pl, which we will just discard.
+
+if (@ARGV > 0) {
+ while (@ARGV >= 2 && $ARGV[0] =~ m:^-:) { # parse any options
+ # that would normally go to qsub, but which will be ignored here.
+ $switch = shift @ARGV;
+ if ($switch eq "-V") {
+ $qsub_opts .= "-V ";
+ } else {
+ $option = shift @ARGV;
+ if ($switch eq "-sync" && $option =~ m/^[yY]/) {
+ $qsub_opts .= "-sync "; # Note: in the
+ # corresponding coce in queue.pl it says instead, just "$sync = 1;".
+ }
+ $qsub_opts .= "$switch $option ";
+ if ($switch eq "-pe") { # e.g. -pe smp 5
+ $option2 = shift @ARGV;
+ $qsub_opts .= "$option2 ";
+ }
+ }
+ }
+ if ($ARGV[0] =~ m/^([\w_][\w\d_]*)+=(\d+):(\d+)$/) {
+ $jobname = $1;
+ $jobstart = $2;
+ $jobend = $3;
+ shift;
+ if ($jobstart > $jobend) {
+ die "queue.pl: invalid job range $ARGV[0]";
+ }
+ } elsif ($ARGV[0] =~ m/^([\w_][\w\d_]*)+=(\d+)$/) { # e.g. JOB=1.
+ $jobname = $1;
+ $jobstart = $2;
+ $jobend = $2;
+ shift;
+ } elsif ($ARGV[0] =~ m/.+\=.*\:.*$/) {
+ print STDERR "Warning: suspicious first argument to queue.pl: $ARGV[0]\n";
+ }
+}
+
+if ($qsub_opts ne "") {
+ print STDERR "Warning: run.pl ignoring options \"$qsub_opts\"\n";
+}
+
+$logfile = shift @ARGV;
+
+if (defined $jobname && $logfile !~ m/$jobname/ &&
+ $jobend > $jobstart) {
+ print STDERR "run.pl: you are trying to run a parallel job but "
+ . "you are putting the output into just one log file ($logfile)\n";
+ exit(1);
+}
+
+$cmd = "";
+
+foreach $x (@ARGV) {
+ if ($x =~ m/^\S+$/) { $cmd .= $x . " "; }
+ elsif ($x =~ m:\":) { $cmd .= "'$x' "; }
+ else { $cmd .= "\"$x\" "; }
+}
+
+
+for ($jobid = $jobstart; $jobid <= $jobend; $jobid++) {
+ $childpid = fork();
+ if (!defined $childpid) { die "Error forking in run.pl (writing to $logfile)"; }
+ if ($childpid == 0) { # We're in the child... this branch
+ # executes the job and returns (possibly with an error status).
+ if (defined $jobname) {
+ $cmd =~ s/$jobname/$jobid/g;
+ $logfile =~ s/$jobname/$jobid/g;
+ }
+ system("mkdir -p `dirname $logfile` 2>/dev/null");
+ open(F, ">$logfile") || die "Error opening log file $logfile";
+ print F "# " . $cmd . "\n";
+ print F "# Started at " . `date`;
+ $starttime = `date +'%s'`;
+ print F "#\n";
+ close(F);
+
+ # Pipe into bash.. make sure we're not using any other shell.
+ open(B, "|bash") || die "Error opening shell command";
+ print B "( " . $cmd . ") 2>>$logfile >> $logfile";
+ close(B); # If there was an error, exit status is in $?
+ $ret = $?;
+
+ $endtime = `date +'%s'`;
+ open(F, ">>$logfile") || die "Error opening log file $logfile (again)";
+ $enddate = `date`;
+ chop $enddate;
+ print F "# Ended (code $ret) at " . $enddate . ", elapsed time " . ($endtime-$starttime) . " seconds\n";
+ close(F);
+ exit($ret == 0 ? 0 : 1);
+ }
+}
+
+$ret = 0;
+$numfail = 0;
+for ($jobid = $jobstart; $jobid <= $jobend; $jobid++) {
+ $r = wait();
+ if ($r == -1) { die "Error waiting for child process"; } # should never happen.
+ if ($? != 0) { $numfail++; $ret = 1; } # The child process failed.
+}
+
+if ($ret != 0) {
+ $njobs = $jobend - $jobstart + 1;
+ if ($njobs == 1) {
+ print STDERR "run.pl: job failed, log is in $logfile\n";
+ if ($logfile =~ m/JOB/) {
+ print STDERR "queue.pl: probably you forgot to put JOB=1:\$nj in your script.\n";
+ }
+ }
+ else {
+ $logfile =~ s/$jobname/*/g;
+ print STDERR "run.pl: $numfail / $njobs failed, log is in $logfile\n";
+ }
+}
+
+
+exit ($ret);