Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions docs/supported_metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@ We include x mark if the metric is auto-installed in versa.
| 6 | x | PESQ in TorchAudio-Squim | squim_no_ref | torch_squim_pesq | [torch_squim](https://pytorch.org/audio/main/tutorials/squim_tutorial.html) | [paper](https://arxiv.org/abs/2304.01448) |
| 7 | x | STOI in TorchAudio-Squim | squim_no_ref | torch_squim_stoi | [torch_squim](https://pytorch.org/audio/main/tutorials/squim_tutorial.html) | [paper](https://arxiv.org/abs/2304.01448) |
| 8 | x | SI-SDR in TorchAudio-Squim | squim_no_ref | torch_squim_si_sdr | [torch_squim](https://pytorch.org/audio/main/tutorials/squim_tutorial.html) | [paper](https://arxiv.org/abs/2304.01448) |
| 9 | x | Singing voice MOS | singmos | singmos |[singmos](https://github.com/South-Twilight/SingMOS/tree/main) | [paper](https://arxiv.org/abs/2406.10911) |
| 9 | x | Singing voice MOS | singmos_v2 | singmos_v2 |[singmos](https://github.com/South-Twilight/SingMOS/tree/main) | [paper](https://arxiv.org/abs/2406.10911) |
| 9 | x | Singing voice MOS | pseudo_mos | singmos_v1 |[singmos](https://github.com/South-Twilight/SingMOS) | [paper](https://arxiv.org/abs/2406.10911) |
| 10 | x | Sheet SSQA MOS Models | sheet_ssqa | sheet_ssqa |[Sheet](https://github.com/unilight/sheet/tree/main) | [paper](https://arxiv.org/abs/2411.03715) |
| 11 | | UTMOSv2: UTokyo-SaruLab MOS Prediction System | utmosv2 | utmosv2 |[UTMOSv2](https://github.com/sarulab-speech/UTMOSv2) | [paper](https://arxiv.org/abs/2409.09305) |
| 12 | | Speech Contrastive Regression for Quality Assessment without reference (ScoreQ) | scoreq_nr | scoreq_nr |[ScoreQ](https://github.com/ftshijt/scoreq/tree/main) | [paper](https://arxiv.org/pdf/2410.06675) |
Expand Down Expand Up @@ -60,7 +59,7 @@ We include x mark if the metric is auto-installed in versa.
| 52 | | WV-MOS (MOS score prediction by fine-tuned wav2vec2.0 model) | wvmos | wvmos | [wvmos](https://github.com/AndreevP/wvmos) | [paper](https://arxiv.org/abs/2203.13086) |
| 53 | |SIG-MOS | sigmos | {SIGMOS_COL, SIGMOS_DISC, SIGMOS_LOUD, SIGMOS_REVERB, SIGMOS_SIG, SIGMOS_OVRL} | [sigmos](https://github.com/microsoft/SIG-Challenge/tree/main/ICASSP2024/sigmos) |[paper](https://arxiv.org/pdf/2309.07385) |
| 54 | x | VQScore (Self-Supervised Speech Quality Estimation and Enhancement Using Only Clean Speech) | vqscore | vqscore | [VQScore](https://github.com/JasonSWFu/VQscore) | [paper](https://arxiv.org/abs/2402.16321) |

| 55 | x | Singing voice MOS | pseudo_mos | singmos_pro |[singmos](https://github.com/South-Twilight/SingMOS) | [paper](https://arxiv.org/abs/2510.01812) |

### Dependent Metrics
|Number| Auto-Install | Metric Name (Auto-Install) | Key in config | Key in report | Code Source | References |
Expand Down
4 changes: 2 additions & 2 deletions egs/separate_metrics/pseudo_mos.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
fs: 16000
plcmos:
fs: 16000
singmos:
singmos_v1:
fs: 16000
singmos_v2:
singmos_pro:
fs: 16000
dnsmos_pro_bvcc:
fs: 16000
Expand Down
2 changes: 1 addition & 1 deletion egs/singing.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
cache_dir: versa_cache/audiobox

- name: pseudo_mos
predictor_types: ["singmos", "singmos_v2"]
predictor_types: ["singmos_pro", "singmos_v1"]

# An overall model on MOS-bench from Sheet toolkit
# More info in https://github.com/unilight/sheet/tree/main
Expand Down
40 changes: 20 additions & 20 deletions versa/utterance_metrics/pseudo_mos.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,20 +89,20 @@ def pseudo_mos_setup(
predictor_fs["plcmos"] = predictor_args["plcmos"]["fs"]
elif predictor == "utmos" or predictor == "utmosv2":
continue # already initialized
elif predictor == "singmos":
elif predictor == "singmos_v1":
torch.hub.set_dir(cache_dir)
singmos = torch.hub.load(
"South-Twilight/SingMOS:v0.2.0", "singing_ssl_mos", trust_repo=True
"South-Twilight/SingMOS:v1.1.1", "singmos_v1", trust_repo=True
).to(device)
predictor_dict["singmos"] = singmos
predictor_fs["singmos"] = 16000
elif predictor == "singmos_v2":
predictor_dict["singmos_v1"] = singmos
predictor_fs["singmos_v1"] = 16000
elif predictor == "singmos_pro":
torch.hub.set_dir(cache_dir)
singmos = torch.hub.load(
"South-Twilight/SingMOS:v0.3.0", "singing_ssl_mos_v2", trust_repo=True
"South-Twilight/SingMOS:v1.1.1", "singmos_pro", trust_repo=True
).to(device)
predictor_dict["singmos_v2"] = singmos
predictor_fs["singmos_v2"] = 16000
predictor_dict["singmos_pro"] = singmos
predictor_fs["singmos_pro"] = 16000
elif predictor.startswith("dnsmos_pro_"):
variant = predictor[len("dnsmos_pro_") :]
model_path = Path(cache_dir) / f"dnsmos_pro_{variant}.pt"
Expand Down Expand Up @@ -209,10 +209,10 @@ def pseudo_mos_metric(pred, fs, predictor_dict, predictor_fs, use_gpu=False):
max_val = np.max(np.abs(pred_plcmos))
score = predictor_dict["plcmos"].run(pred_plcmos / max_val, sr=fs)
scores.update(plcmos=score["plcmos"])
elif predictor == "singmos":
if fs != predictor_fs["singmos"]:
elif predictor == "singmos_v1":
if fs != predictor_fs["singmos_v1"]:
pred_singmos = librosa.resample(
pred, orig_sr=fs, target_sr=predictor_fs["singmos"]
pred, orig_sr=fs, target_sr=predictor_fs["singmos_v1"]
)
else:
pred_singmos = pred
Expand All @@ -221,14 +221,14 @@ def pseudo_mos_metric(pred, fs, predictor_dict, predictor_fs, use_gpu=False):
if use_gpu:
pred_tensor = pred_tensor.to("cuda")
length_tensor = length_tensor.to("cuda")
score = predictor_dict["singmos"](pred_tensor.float(), length_tensor)[
score = predictor_dict["singmos_v1"](pred_tensor.float(), length_tensor)[
0
].item()
scores.update(singmos=score)
elif predictor == "singmos_v2":
if fs != predictor_fs["singmos_v2"]:
scores.update(singmos_v1=score)
elif predictor == "singmos_pro":
if fs != predictor_fs["singmos_pro"]:
pred_singmos = librosa.resample(
pred, orig_sr=fs, target_sr=predictor_fs["singmos_v2"]
pred, orig_sr=fs, target_sr=predictor_fs["singmos_pro"]
)
else:
pred_singmos = pred
Expand All @@ -237,10 +237,10 @@ def pseudo_mos_metric(pred, fs, predictor_dict, predictor_fs, use_gpu=False):
if use_gpu:
pred_tensor = pred_tensor.to("cuda")
length_tensor = length_tensor.to("cuda")
score = predictor_dict["singmos_v2"](pred_tensor.float(), length_tensor)[
score = predictor_dict["singmos_pro"](pred_tensor.float(), length_tensor)[
0
].item()
scores.update(singmos_v2=score)
scores.update(singmos_pro=score)
elif predictor.startswith("dnsmos_pro_"):
if fs != predictor_fs[predictor]:
pred_dnsmos_pro = librosa.resample(
Expand Down Expand Up @@ -305,8 +305,8 @@ def stft(
"utmos",
"dnsmos",
"plcmos",
"singmos",
"singmos_v2",
"singmos_v1",
"singmos_pro",
"dnsmos_pro_bvcc",
"dnsmos_pro_nisqa",
"dnsmos_pro_vcc2018",
Expand Down
Loading