From b136d913cc51f5bd0d899954c78170abbe48a70f Mon Sep 17 00:00:00 2001 From: rob-maron <132852777+rob-maron@users.noreply.github.com> Date: Thu, 19 Mar 2026 14:13:15 -0400 Subject: [PATCH 1/2] trim whitespace + control chars --- shared/data-provider/src/hub.rs | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/shared/data-provider/src/hub.rs b/shared/data-provider/src/hub.rs index 13a575b84..7206fb58e 100644 --- a/shared/data-provider/src/hub.rs +++ b/shared/data-provider/src/hub.rs @@ -1,11 +1,11 @@ use crate::errors::UploadError; use crate::hub::model::HubRepo; use hf_hub::{ - Cache, Repo, RepoType, api::{ - Siblings, tokio::{ApiError, UploadSource}, + Siblings, }, + Cache, Repo, RepoType, }; use psyche_coordinator::model; use psyche_core::FixedString; @@ -16,6 +16,13 @@ use tracing::{error, info}; const MODEL_EXTENSIONS: [&str; 3] = [".safetensors", ".json", ".py"]; const DATASET_EXTENSIONS: [&str; 1] = [".parquet"]; +/// Strip leading/trailing whitespace and control characters from a repo identifier. +/// TODO: Remove once https://github.com/PsycheFoundation/nousnet/pull/636 is merged +fn sanitize_repo_id(raw: &str) -> String { + raw.trim_matches(|c: char| c.is_whitespace() || c.is_control()) + .to_string() +} + fn check_extensions(sibling: &Siblings, extensions: &[&'static str]) -> bool { match extensions.is_empty() { true => true, @@ -90,10 +97,11 @@ pub async fn download_model_repo_async( max_concurrent_downloads: Option, progress_bar: bool, ) -> Result, ApiError> { + let repo_id = sanitize_repo_id(repo_id); download_repo_async( match revision { - Some(revision) => Repo::with_revision(repo_id.to_string(), RepoType::Model, revision), - None => Repo::model(repo_id.to_string()), + Some(revision) => Repo::with_revision(repo_id.clone(), RepoType::Model, revision), + None => Repo::model(repo_id), }, cache, token, @@ -112,10 +120,11 @@ pub async fn download_dataset_repo_async( max_concurrent_downloads: Option, progress_bar: bool, ) -> Result, ApiError> { + let repo_id = sanitize_repo_id(&repo_id); download_repo_async( match revision { - Some(revision) => Repo::with_revision(repo_id.to_owned(), RepoType::Dataset, revision), - None => Repo::new(repo_id.to_owned(), RepoType::Dataset), + Some(revision) => Repo::with_revision(repo_id.clone(), RepoType::Dataset, revision), + None => Repo::new(repo_id, RepoType::Dataset), }, cache, token, @@ -162,10 +171,11 @@ pub fn download_model_repo_sync( token: Option, progress_bar: bool, ) -> Result, hf_hub::api::sync::ApiError> { + let repo_id = sanitize_repo_id(repo_id); download_repo_sync( match revision { - Some(revision) => Repo::with_revision(repo_id.to_owned(), RepoType::Model, revision), - None => Repo::model(repo_id.to_owned()), + Some(revision) => Repo::with_revision(repo_id.clone(), RepoType::Model, revision), + None => Repo::model(repo_id), }, cache, token, @@ -181,10 +191,11 @@ pub fn download_dataset_repo_sync( token: Option, progress_bar: bool, ) -> Result, hf_hub::api::sync::ApiError> { + let repo_id = sanitize_repo_id(repo_id); download_repo_sync( match revision { - Some(revision) => Repo::with_revision(repo_id.to_owned(), RepoType::Dataset, revision), - None => Repo::new(repo_id.to_owned(), RepoType::Dataset), + Some(revision) => Repo::with_revision(repo_id.clone(), RepoType::Dataset, revision), + None => Repo::new(repo_id, RepoType::Dataset), }, cache, token, From 7fc91ce2a6c0d27214a18abda2b020d9c79fceaf Mon Sep 17 00:00:00 2001 From: rob-maron <132852777+rob-maron@users.noreply.github.com> Date: Thu, 19 Mar 2026 14:15:56 -0400 Subject: [PATCH 2/2] fmt --- shared/data-provider/src/hub.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/shared/data-provider/src/hub.rs b/shared/data-provider/src/hub.rs index 7206fb58e..5b44a01d3 100644 --- a/shared/data-provider/src/hub.rs +++ b/shared/data-provider/src/hub.rs @@ -1,11 +1,11 @@ use crate::errors::UploadError; use crate::hub::model::HubRepo; use hf_hub::{ + Cache, Repo, RepoType, api::{ - tokio::{ApiError, UploadSource}, Siblings, + tokio::{ApiError, UploadSource}, }, - Cache, Repo, RepoType, }; use psyche_coordinator::model; use psyche_core::FixedString;