From b3b7520127ec49b68faf53f31926ef0623518249 Mon Sep 17 00:00:00 2001 From: Liudmila Molkova Date: Fri, 13 Feb 2026 13:46:41 -0800 Subject: [PATCH 01/27] checkpoint: manifest renames --- Cargo.lock | 1 + crates/weaver_resolved_schema/src/lib.rs | 4 +- .../published/registry_manifest.yaml | 8 +- crates/weaver_resolver/src/loader.rs | 7 +- crates/weaver_semconv/Cargo.toml | 1 + crates/weaver_semconv/src/manifest.rs | 128 +++++++++++++----- crates/weaver_semconv/src/registry.rs | 10 +- crates/weaver_semconv/src/registry_repo.rs | 50 +++++-- .../3.0.0/registry_manifest.yaml | 2 +- .../tests/published_repository/resolved/1.0.0 | 2 +- .../tests/published_repository/resolved/2.0.0 | 2 +- 11 files changed, 151 insertions(+), 64 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3b7094cba..11ff91ae9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5894,6 +5894,7 @@ dependencies = [ "serde_yaml", "thiserror 2.0.18", "ureq", + "url", "utoipa", "walkdir", "weaver_common", diff --git a/crates/weaver_resolved_schema/src/lib.rs b/crates/weaver_resolved_schema/src/lib.rs index cde4f4947..bc7e371a4 100644 --- a/crates/weaver_resolved_schema/src/lib.rs +++ b/crates/weaver_resolved_schema/src/lib.rs @@ -298,13 +298,13 @@ impl ResolvedTelemetrySchema { if let Some(ref manifest) = self.registry_manifest { changes.set_head_manifest(weaver_version::schema_changes::RegistryManifest { - semconv_version: manifest.version.clone(), + semconv_version: manifest.version().clone(), }); } if let Some(ref manifest) = baseline_schema.registry_manifest { changes.set_baseline_manifest(weaver_version::schema_changes::RegistryManifest { - semconv_version: manifest.version.clone(), + semconv_version: manifest.version().clone(), }); } diff --git a/crates/weaver_resolver/data/registry-test-published-1/published/registry_manifest.yaml b/crates/weaver_resolver/data/registry-test-published-1/published/registry_manifest.yaml index f85d5ef60..6d7cb03d2 100644 --- a/crates/weaver_resolver/data/registry-test-published-1/published/registry_manifest.yaml +++ b/crates/weaver_resolver/data/registry-test-published-1/published/registry_manifest.yaml @@ -1,7 +1,5 @@ file_format: manifest/2.0.0 -name: resolved -description: Test repository that has been resolved. -version: 1.0.0 -repository_url: https://github.com/open-telemetry/weaver.git stability: stable -resolved_schema_url: resolved_schema.yaml +schema_url: https://opentelemetry.io/schemas/1.0.0 +resolved_schema_uri: resolved_schema.yaml +description: Test repository that has been resolved. \ No newline at end of file diff --git a/crates/weaver_resolver/src/loader.rs b/crates/weaver_resolver/src/loader.rs index b00634e54..7d1e8af29 100644 --- a/crates/weaver_resolver/src/loader.rs +++ b/crates/weaver_resolver/src/loader.rs @@ -14,7 +14,7 @@ use weaver_common::result::WResult; use weaver_resolved_schema::v2::ResolvedTelemetrySchema as V2Schema; use weaver_resolved_schema::ResolvedTelemetrySchema as V1Schema; use weaver_semconv::json_schema::JsonSchemaValidator; -use weaver_semconv::registry_repo::{RegistryRepo, REGISTRY_MANIFEST}; +use weaver_semconv::registry_repo::{LEGACY_REGISTRY_MANIFEST, REGISTRY_MANIFEST, RegistryRepo}; use weaver_semconv::{group::ImportsWithProvenance, semconv::SemConvSpecWithProvenance}; use crate::Error; @@ -192,7 +192,7 @@ fn load_semconv_repository_recursive( // Either load a fully resolved repository, or read in raw files. if let Some(manifest) = registry_repo.manifest() { - if let Some(resolved_url) = registry_repo.resolved_schema_url() { + if let Some(resolved_url) = registry_repo.resolved_schema_uri() { load_resolved_repository(&resolved_url) } else { if manifest.dependencies.len() > 1 { @@ -281,6 +281,7 @@ fn load_definition_repository( && (extension == "yaml" || extension == "yml") && file_name != "schema-next.yaml" && file_name != REGISTRY_MANIFEST + && file_name != LEGACY_REGISTRY_MANIFEST } let local_path = registry_repo.path().to_path_buf(); let registry_path_repr = registry_repo.registry_path_repr(); @@ -481,7 +482,7 @@ mod tests { WResult::FatalErr(fatal) => { let error_msg = fatal.to_string(); assert!( - error_msg.contains("Circular dependency detected") && + error_msg.contains("Circular dependency detected") && error_msg.contains("registry_a") && error_msg.contains("registry_b"), "Expected circular dependency error mentioning both registries, got: {error_msg}" diff --git a/crates/weaver_semconv/Cargo.toml b/crates/weaver_semconv/Cargo.toml index 50cff5ad9..f4996ab66 100644 --- a/crates/weaver_semconv/Cargo.toml +++ b/crates/weaver_semconv/Cargo.toml @@ -29,6 +29,7 @@ regex.workspace = true globset.workspace = true itertools.workspace = true log.workspace = true +url.workspace = true glob = "0.3.3" jsonschema = "0.40.0" # JSON Schema validation used to enhance error messages diff --git a/crates/weaver_semconv/src/manifest.rs b/crates/weaver_semconv/src/manifest.rs index 3619cd073..548d54cf9 100644 --- a/crates/weaver_semconv/src/manifest.rs +++ b/crates/weaver_semconv/src/manifest.rs @@ -25,12 +25,16 @@ use weaver_common::vdir::VirtualDirectoryPath; pub struct RegistryManifest { /// The file format for this registry. /// - /// No value is assumed to be `definition/1.0.0` + /// No value is assumed to be `manifest/2.0.0` #[serde(skip_serializing_if = "Option::is_none", default)] pub file_format: Option, - /// The name of the registry. This name is used to define the package name. - pub name: String, + /// The schema URL for this registry. + /// This URL is populated before registry is published and is used as + /// a unique identifier of the registry. It MUST follow OTel schema URL format, which is: + /// `http[s]://server[:port]/path/`. + /// See https://github.com/open-telemetry/opentelemetry-specification/blob/v1.53.0/specification/schemas/README.md#schema-url for more details. + pub schema_url: Option, /// An optional description of the registry. /// @@ -41,12 +45,18 @@ pub struct RegistryManifest { pub description: Option, /// The version of the registry which will be used to define the semconv package version. - #[serde(alias = "semconv_version")] - pub version: String, + #[serde(skip_serializing_if = "Option::is_none", default)] + #[deprecated( + note = "The `version` field is deprecated. The registry version should be specified in the `schema_url` field, which is required and serves as a unique identifier for the registry." + )] + pub semconv_version: Option, /// The base URL where the registry's schema files are hosted. - #[serde(alias = "schema_base_url")] - pub repository_url: String, + #[serde(skip_serializing_if = "Option::is_none", default)] + #[deprecated( + note = "The `schema_base_url` field is deprecated. The registry schema URL should be specified in the `schema_url` field, which is required and serves as a unique identifier for the registry." + )] + pub schema_base_url: Option, /// List of the registry's dependencies. /// Note: In the current phase, we only support zero or one dependency. @@ -60,7 +70,7 @@ pub struct RegistryManifest { /// The location of the resolved telemetry schema, if available. #[serde(skip_serializing_if = "Option::is_none")] - pub resolved_schema_url: Option, + pub resolved_schema_uri: Option, } /// Represents a dependency of a semantic convention registry. @@ -94,7 +104,7 @@ impl RegistryManifest { error: e.to_string(), })?; let reader = std::io::BufReader::new(file); - let manifest: RegistryManifest = + let mut manifest: RegistryManifest = serde_yaml::from_reader(reader).map_err(|e| InvalidRegistryManifest { path: manifest_path_buf.clone(), error: e.to_string(), @@ -102,37 +112,94 @@ impl RegistryManifest { manifest.validate(manifest_path_buf.clone())?; + // If the schema URL is not provided, populate it using deprecated schema_base_url and semconv_version + // validation would fail if they were not provided + if manifest.schema_url.is_none() { + manifest.schema_url = Some(format!( + "{}/{}", + manifest.schema_base_url.clone().unwrap_or_default(), + manifest.semconv_version.clone().unwrap_or_default() + )); + } + Ok(manifest) } fn validate(&self, path: PathBuf) -> Result<(), Error> { let mut errors = vec![]; - if self.name.is_empty() { - errors.push(InvalidRegistryManifest { - path: path.clone(), - error: "The registry name is required.".to_owned(), - }); - } - - if self.version.is_empty() { - errors.push(InvalidRegistryManifest { - path: path.clone(), - error: "The registry version is required.".to_owned(), - }); - } + let schema_url_empty = self.schema_url.as_ref().map_or(true, |url| url.is_empty()); + let schema_base_url_empty = self.schema_base_url.as_ref().map_or(true, |url| url.is_empty()); + let semconv_version_empty = self.semconv_version.as_ref().map_or(true, |v| v.is_empty()); - if self.repository_url.is_empty() { - errors.push(InvalidRegistryManifest { - path: path.clone(), - error: "The registry schema base URL is required.".to_owned(), - }); + if schema_url_empty { + if schema_base_url_empty || semconv_version_empty { + errors.push(InvalidRegistryManifest { + path: path.clone(), + error: "The registry schema URL is required.".to_owned(), + }); + } else { + // schema_base_url should be a valid absolute URL, otherwise push an error to the list. + if let Err(e) = url::Url::parse(self.schema_base_url.as_ref().unwrap()) { + errors.push(InvalidRegistryManifest { + path: path.clone(), + error: format!("Invalid schema base URL: {}", e), + }); + } + } + } else { + // validate the resolved schema URL: it must be a valid absolute URI with at least one path segment + match url::Url::parse(self.schema_url.as_ref().unwrap()) { + Ok(parsed_url) => { + if parsed_url.path_segments().map(|c| c.count()).unwrap_or(0) == 0 { + errors.push(InvalidRegistryManifest { + path: path.clone(), + error: "The registry schema URL must have at least one path segment.".to_owned(), + }); + } + } + Err(e) => { + errors.push(InvalidRegistryManifest { + path: path.clone(), + error: format!("Invalid schema URL: {}", e), + }); + } + } } handle_errors(errors)?; - Ok(()) } + + /// Returns the registry name, which is derived from the schema URL. + /// For example, if the schema URL is `https://opentelemetry.io/schemas/sub-component/1.0.0`, + /// the registry name would be `opentelemetry.io/schemas/sub-component` + pub fn name(&self) -> String { + let schema_url = self.schema_url.as_ref().expect("schema_url was validated"); + let parsed_url = url::Url::parse(schema_url).expect("schema_url was validated"); + let authority = parsed_url.host_str().unwrap_or_default(); + let path = parsed_url.path().trim_matches('/'); + let mut segments: Vec<&str> = path.split('/').collect(); + if !segments.is_empty() { + _ = segments.pop(); + } + format!("{}/{}", authority, segments.join("/")) + } + + /// Returns the registry version, which is derived from the schema URL. + /// For example, if the schema URL is `https://opentelemetry.io/schemas/sub-component/1.0.0`, + /// the registry version would be `1.0.0` + pub fn version(&self) -> String { + let schema_url = self.schema_url.as_ref().expect("schema_url was validated"); + let parsed_url = url::Url::parse(schema_url).expect("schema_url was validated"); + parsed_url + .path() + .trim_matches('/') + .rsplit('/') + .next() + .unwrap_or("") + .to_string() + } } #[cfg(test)] @@ -163,9 +230,8 @@ mod tests { let config = RegistryManifest::try_from_file("tests/test_data/valid_semconv_registry_manifest.yaml") .expect("Failed to load the registry configuration file."); - assert_eq!(config.name, "vendor_acme"); - assert_eq!(config.version, "0.1.0"); - assert_eq!(config.repository_url, "https://acme.com/schemas/"); + assert_eq!(config.name(), "vendor_acme"); + assert_eq!(config.version(), "0.1.0"); } #[test] diff --git a/crates/weaver_semconv/src/registry.rs b/crates/weaver_semconv/src/registry.rs index 3ee47d960..7e33fa5b2 100644 --- a/crates/weaver_semconv/src/registry.rs +++ b/crates/weaver_semconv/src/registry.rs @@ -145,12 +145,12 @@ impl SemConvRegistry { registry.set_manifest(RegistryManifest { file_format: None, - name: registry_repo.id().as_ref().to_owned(), - description: None, - version: semconv_version, - repository_url: "".to_owned(), + schema_url: registry_repo.manifest().and_then(|m| Some(m.schema_url.clone())).unwrap_or_default(), + schema_base_url: registry_repo.manifest().and_then(|m| m.schema_base_url.clone()), + semconv_version: registry_repo.manifest().and_then(|m| m.semconv_version.clone()), + description: registry_repo.manifest().and_then(|m| m.description.clone()), dependencies: vec![], - resolved_schema_url: None, + resolved_schema_uri: None, stability: crate::stability::Stability::Development, }); } else { diff --git a/crates/weaver_semconv/src/registry_repo.rs b/crates/weaver_semconv/src/registry_repo.rs index 22c7c0eb0..c91f89d57 100644 --- a/crates/weaver_semconv/src/registry_repo.rs +++ b/crates/weaver_semconv/src/registry_repo.rs @@ -9,10 +9,16 @@ use std::sync::Arc; use crate::manifest::RegistryManifest; use crate::Error; use weaver_common::vdir::{VirtualDirectory, VirtualDirectoryPath}; -use weaver_common::{get_path_type, log_info}; +use weaver_common::{get_path_type, log_info, log_warn}; + +/// The name of the legacy registry manifest file. +#[deprecated( + note = "The registry manifest file is renamed to `manifest.yaml`." +)] +pub const LEGACY_REGISTRY_MANIFEST: &str = "registry_manifest.yaml"; /// The name of the registry manifest file. -pub const REGISTRY_MANIFEST: &str = "registry_manifest.yaml"; +pub const REGISTRY_MANIFEST: &str = "manifest.yaml"; /// A semantic convention registry repository that can be: /// - A definition repository, which is one of: @@ -48,7 +54,7 @@ impl RegistryRepo { }; if let Some(manifest) = registry_repo.manifest_path() { let registry_manifest = RegistryManifest::try_from_file(manifest)?; - registry_repo.id = Arc::from(registry_manifest.name.as_str()); + registry_repo.id = Arc::from(registry_manifest.name().as_str()); registry_repo.manifest = Some(registry_manifest); } Ok(registry_repo) @@ -78,27 +84,27 @@ impl RegistryRepo { self.manifest.as_ref() } - /// Returns the resolved schema URL, if available in the manifest. + /// Returns the resolved schema URI, if available in the manifest. #[must_use] - pub fn resolved_schema_url(&self) -> Option { + pub fn resolved_schema_uri(&self) -> Option { let manifest = self.manifest.as_ref()?; - let resolved_url: &str = manifest.resolved_schema_url.as_ref()?; - match get_path_type(resolved_url) { + let resolved_uri: &str = manifest.resolved_schema_uri.as_ref()?; + match get_path_type(resolved_uri) { weaver_common::PathType::RelativePath => { - // We need to understand if the manifest URL is the same as the registry URL. + // We need to understand if the manifest URI is the same as the registry URI. let vdir_was_manifest_file = self.manifest_path()? == self.registry.path(); Some(self.registry.vdir_path().map_sub_folder(|path| { if vdir_was_manifest_file { match Path::new(&path).parent() { - Some(parent) => format!("{}/{resolved_url}", parent.display()), + Some(parent) => format!("{}/{resolved_uri}", parent.display()), None => "".to_owned(), } } else { - format!("{path}/{resolved_url}") + format!("{path}/{resolved_uri}") } })) } - _ => resolved_url.try_into().ok(), + _ => resolved_uri.try_into().ok(), } } @@ -111,12 +117,20 @@ impl RegistryRepo { return Some(self.registry.path().to_path_buf()); } let manifest_path = self.registry.path().join(REGISTRY_MANIFEST); + let legacy_path = self.registry.path().join(LEGACY_REGISTRY_MANIFEST); if manifest_path.exists() { log_info(format!( "Found registry manifest: {}", manifest_path.display() )); Some(manifest_path) + } else if legacy_path.exists() { + log_warn(format!( + "Found registry manifest: {}. Please rename file to {}, as the old name is deprecated and won't be supported in future versions.", + legacy_path.display(), + REGISTRY_MANIFEST + )); + Some(legacy_path) } else { log_info(format!( "No registry manifest found: {}", @@ -125,6 +139,12 @@ impl RegistryRepo { None } } + + /// Returns the registry schema URL, if available in the manifest. + #[must_use] + pub fn schema_url(&self) -> Option { + self.manifest.as_ref().and_then(|manifest| manifest.schema_url.clone()) + } } #[cfg(test)] @@ -171,9 +191,9 @@ mod tests { let Some(manifest) = repo.manifest() else { panic!("Did not resolve manifest for repo: {repo:?}"); }; - assert_eq!(manifest.name, "resolved"); + assert_eq!(manifest.name(), "resolved"); - let Some(resolved_path) = repo.resolved_schema_url() else { + let Some(resolved_path) = repo.resolved_schema_uri() else { panic!( "Should find a resolved schema path from manifest in {}", repo.registry_path_repr() @@ -190,7 +210,7 @@ mod tests { }; let repo = RegistryRepo::try_new("main", ®istry_path).expect("Failed to load test repository."); - let Some(resolved_path) = repo.resolved_schema_url() else { + let Some(resolved_path) = repo.resolved_schema_uri() else { panic!( "Should find a resolved schema path from manifest in {}", repo.registry_path_repr() @@ -204,7 +224,7 @@ mod tests { }; let repo = RegistryRepo::try_new("main", ®istry_path).expect("Failed to load test repository."); - let Some(resolved_path) = repo.resolved_schema_url() else { + let Some(resolved_path) = repo.resolved_schema_uri() else { panic!( "Should find a resolved schema path from manifest in {}", repo.registry_path_repr() diff --git a/crates/weaver_semconv/tests/published_repository/3.0.0/registry_manifest.yaml b/crates/weaver_semconv/tests/published_repository/3.0.0/registry_manifest.yaml index eb2ca0198..c4fc26958 100644 --- a/crates/weaver_semconv/tests/published_repository/3.0.0/registry_manifest.yaml +++ b/crates/weaver_semconv/tests/published_repository/3.0.0/registry_manifest.yaml @@ -4,4 +4,4 @@ description: Test repository that has been resolved. version: 3.0.0 repository_url: https://github.com/open-telemetry/weaver.git stability: stable -resolved_schema_url: resolved_schema.yaml +resolved_schema_uri: resolved_schema.yaml diff --git a/crates/weaver_semconv/tests/published_repository/resolved/1.0.0 b/crates/weaver_semconv/tests/published_repository/resolved/1.0.0 index 1dc1d84e7..aa8518c7b 100644 --- a/crates/weaver_semconv/tests/published_repository/resolved/1.0.0 +++ b/crates/weaver_semconv/tests/published_repository/resolved/1.0.0 @@ -4,4 +4,4 @@ description: Test repository that has been resolved. version: 1.0.0 repository_url: https://github.com/open-telemetry/weaver.git stability: stable -resolved_schema_url: resolved_1.0.0.yaml +resolved_schema_uri: resolved_1.0.0.yaml diff --git a/crates/weaver_semconv/tests/published_repository/resolved/2.0.0 b/crates/weaver_semconv/tests/published_repository/resolved/2.0.0 index 681fa6400..91bbc3a6b 100644 --- a/crates/weaver_semconv/tests/published_repository/resolved/2.0.0 +++ b/crates/weaver_semconv/tests/published_repository/resolved/2.0.0 @@ -4,4 +4,4 @@ description: Test repository that has been resolved. version: 2.0.0 repository_url: https://github.com/open-telemetry/weaver.git stability: stable -resolved_schema_url: https://github.com/open-telemetry/weaver.git\creates/weaver_semconv/tests/published_respository/resolved/resolved_2.0.0 +resolved_schema_uri: https://github.com/open-telemetry/weaver.git\creates/weaver_semconv/tests/published_respository/resolved/resolved_2.0.0 From 148bd04cbc5b24f5138167de2afed2c5926ad392 Mon Sep 17 00:00:00 2001 From: Liudmila Molkova Date: Fri, 13 Feb 2026 18:11:40 -0800 Subject: [PATCH 02/27] another checkpoint --- crates/weaver_codegen_test/build.rs | 4 +- crates/weaver_emit/src/lib.rs | 3 +- crates/weaver_forge/src/lib.rs | 15 +- crates/weaver_forge/src/v2/registry.rs | 14 +- crates/weaver_live_check/src/live_checker.rs | 14 +- crates/weaver_mcp/src/service.rs | 3 +- crates/weaver_resolved_schema/src/lib.rs | 47 ++-- crates/weaver_resolved_schema/src/v2/mod.rs | 27 +-- .../weaver_resolved_schema/src/v2/registry.rs | 6 - .../registry_a/registry_manifest.yaml | 2 +- .../registry_b/registry_manifest.yaml | 4 +- .../app_registry/registry_manifest.yaml | 2 +- .../custom_registry/registry_manifest.yaml | 2 +- .../expected-registry.json | 4 +- .../published/resolved_schema.yaml | 4 +- .../registry/registry_manifest.yaml | 2 +- crates/weaver_resolver/src/attribute.rs | 2 +- crates/weaver_resolver/src/dependency.rs | 12 +- crates/weaver_resolver/src/error.rs | 19 +- crates/weaver_resolver/src/lib.rs | 39 ++-- crates/weaver_resolver/src/loader.rs | 69 +++--- crates/weaver_resolver/src/registry.rs | 10 +- crates/weaver_search/src/lib.rs | 3 +- crates/weaver_semconv/src/manifest.rs | 201 +++++++++++++----- crates/weaver_semconv/src/registry.rs | 18 +- crates/weaver_semconv/src/registry_repo.rs | 112 +++++++--- .../3.0.0/registry_manifest.yaml | 3 +- .../tests/published_repository/resolved/1.0.0 | 3 +- .../tests/published_repository/resolved/2.0.0 | 3 +- .../weaver_semconv_gen/data_v2/templates.md | 4 +- crates/weaver_semconv_gen/src/v1.rs | 2 +- crates/weaver_semconv_gen/src/v2.rs | 5 +- src/registry/check.rs | 2 +- src/registry/diff.rs | 9 +- src/registry/resolve.rs | 3 +- src/serve/handlers.rs | 2 +- src/serve/types.rs | 4 +- src/weaver.rs | 3 +- tests/custom_registry/registry_manifest.yaml | 2 +- tests/registry_stats.rs | 6 +- tests/resolution_process.rs | 11 +- 41 files changed, 447 insertions(+), 253 deletions(-) diff --git a/crates/weaver_codegen_test/build.rs b/crates/weaver_codegen_test/build.rs index 7681d0deb..8183376e2 100644 --- a/crates/weaver_codegen_test/build.rs +++ b/crates/weaver_codegen_test/build.rs @@ -42,8 +42,8 @@ fn main() { let registry_path = VirtualDirectoryPath::LocalFolder { path: SEMCONV_REGISTRY_PATH.into(), }; - let registry_repo = - RegistryRepo::try_new("main", ®istry_path).unwrap_or_else(|e| process_error(&logger, e)); + let registry_repo = RegistryRepo::try_new(Some("main"), Some("1.0.0"), ®istry_path) + .unwrap_or_else(|e| process_error(&logger, e)); let loaded = SchemaResolver::load_semconv_repository(registry_repo, FOLLOW_SYMLINKS) .ignore(|e| matches!(e.severity(), Some(miette::Severity::Warning))) .into_result_failing_non_fatal() diff --git a/crates/weaver_emit/src/lib.rs b/crates/weaver_emit/src/lib.rs index 1f75880ef..1c8477d0f 100644 --- a/crates/weaver_emit/src/lib.rs +++ b/crates/weaver_emit/src/lib.rs @@ -268,6 +268,7 @@ mod tests { use weaver_semconv::{ attribute::{AttributeType, Examples, PrimitiveOrArrayTypeSpec, RequirementLevel}, group::{GroupType, InstrumentSpec, SpanKindSpec}, + manifest::SchemaUrl, stability::Stability, }; @@ -590,7 +591,7 @@ mod tests { }; let registry = ForgeResolvedRegistry { - registry_url: "TEST_V2".to_owned(), + schema_url: SchemaUrl("https://example.com/schemas/1.2.3".to_owned()), registry: Registry { attributes: vec![], attribute_groups: vec![], diff --git a/crates/weaver_forge/src/lib.rs b/crates/weaver_forge/src/lib.rs index 47b673ed6..f432ed2f2 100644 --- a/crates/weaver_forge/src/lib.rs +++ b/crates/weaver_forge/src/lib.rs @@ -856,11 +856,12 @@ mod tests { ignore_non_fatal_errors: bool, ) -> (TemplateEngine, ResolvedRegistry, PathBuf, PathBuf) { let registry_id = "default"; + let registry_version = "1.0.0"; let path: VirtualDirectoryPath = "data/registry" .try_into() .expect("Invalid virtual directory path string"); - let repo = - RegistryRepo::try_new(registry_id, &path).expect("Failed to construct repository"); + let repo = RegistryRepo::try_new(Some(registry_id), Some(registry_version), &path) + .expect("Failed to construct repository"); let registry_result = SchemaResolver::load_semconv_repository(repo, false); // SemConvRegistry::try_from_path_pattern(registry_id, "data/*.yaml"); let registry = if ignore_non_fatal_errors { @@ -1060,8 +1061,9 @@ mod tests { let path: VirtualDirectoryPath = "data/registry" .try_into() .expect("Invalid virtual directory path string"); - let repo = - RegistryRepo::try_new(registry_id, &path).expect("Failed to construct repository"); + let registry_version = "1.0.0"; + let repo = RegistryRepo::try_new(Some(registry_id), Some(registry_version), &path) + .expect("Failed to construct repository"); let loaded = SchemaResolver::load_semconv_repository(repo, false) .into_result_with_non_fatal() .expect("Failed to load registry") @@ -1190,8 +1192,9 @@ mod tests { let path: VirtualDirectoryPath = "data/mini_registry_for_comments" .try_into() .expect("Invalid virtual directory path string"); - let repo = - RegistryRepo::try_new(registry_id, &path).expect("Failed to construct repository"); + let registry_version = "1.0.0"; + let repo = RegistryRepo::try_new(Some(registry_id), Some(registry_version), &path) + .expect("Failed to construct repository"); let loaded = SchemaResolver::load_semconv_repository(repo, false) .into_result_with_non_fatal() .expect("Failed to load registry") diff --git a/crates/weaver_forge/src/v2/registry.rs b/crates/weaver_forge/src/v2/registry.rs index 7c9595756..cd219d683 100644 --- a/crates/weaver_forge/src/v2/registry.rs +++ b/crates/weaver_forge/src/v2/registry.rs @@ -3,6 +3,7 @@ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use weaver_resolved_schema::{attribute::AttributeRef, v2::catalog::AttributeCatalog}; +use weaver_semconv::manifest::SchemaUrl; use crate::{ error::Error, @@ -24,8 +25,7 @@ use crate::{ #[serde(deny_unknown_fields)] pub struct ForgeResolvedRegistry { /// The semantic convention registry url. - #[serde(skip_serializing_if = "String::is_empty")] - pub registry_url: String, + pub schema_url: SchemaUrl, // TODO - Attribute Groups /// The signals defined in this registry. pub registry: Registry, @@ -413,7 +413,7 @@ impl ForgeResolvedRegistry { } Ok(Self { - registry_url: schema.schema_url.clone(), + schema_url: schema.schema_url.clone(), registry: Registry { attributes, attribute_groups, @@ -448,8 +448,7 @@ mod tests { fn test_try_from_resolved_schema() { let resolved_schema = ResolvedTelemetrySchema { file_format: "2.0.0".to_owned(), - schema_url: "https://example.com/schema".to_owned(), - registry_id: "my-registry".to_owned(), + schema_url: SchemaUrl("https://example.com/schema".to_owned()), attribute_catalog: vec![attribute::Attribute { key: "test.attr".to_owned(), r#type: AttributeType::PrimitiveOrArray(PrimitiveOrArrayTypeSpec::String), @@ -457,7 +456,6 @@ mod tests { common: CommonFields::default(), }], registry: v2::registry::Registry { - registry_url: "https://example.com/registry".to_owned(), attributes: vec![attribute::AttributeRef(0)], spans: vec![span::Span { r#type: SignalId::from("my-span".to_owned()), @@ -613,11 +611,9 @@ mod tests { fn test_try_from_resolved_schema_with_missing_attribute() { let resolved_schema = ResolvedTelemetrySchema { file_format: "2.0.0".to_owned(), - schema_url: "https://example.com/schema".to_owned(), - registry_id: "my-registry".to_owned(), + schema_url: SchemaUrl("https://example.com/schema".to_owned()), attribute_catalog: vec![], registry: v2::registry::Registry { - registry_url: "https://example.com/registry".to_owned(), attributes: vec![], // No attributes - This is the logic bug. spans: vec![span::Span { r#type: SignalId::from("my-span".to_owned()), diff --git a/crates/weaver_live_check/src/live_checker.rs b/crates/weaver_live_check/src/live_checker.rs index 0ad3955f8..f73b47e00 100644 --- a/crates/weaver_live_check/src/live_checker.rs +++ b/crates/weaver_live_check/src/live_checker.rs @@ -185,7 +185,6 @@ mod tests { span::{Span as V2Span, SpanAttribute}, }; use weaver_resolved_schema::attribute::Attribute; - use weaver_semconv::v2::{span::SpanName, CommonFields}; use weaver_semconv::{ attribute::{ AttributeType, BasicRequirementLevelSpec, EnumEntriesSpec, Examples, @@ -195,6 +194,10 @@ mod tests { stability::Stability, YamlValue, }; + use weaver_semconv::{ + manifest::SchemaUrl, + v2::{span::SpanName, CommonFields}, + }; fn get_all_advice(sample: &mut Sample) -> &mut [PolicyFinding] { match sample { @@ -508,7 +511,7 @@ mod tests { fn make_registry(use_v2: bool) -> VersionedRegistry { if use_v2 { VersionedRegistry::V2(ForgeResolvedRegistry { - registry_url: "TEST".to_owned(), + schema_url: SchemaUrl("https://example.com/schemas/1.2.3".to_owned()), registry: Registry { attributes: vec![ V2Attribute { @@ -794,7 +797,7 @@ mod tests { }; VersionedRegistry::V2(ForgeResolvedRegistry { - registry_url: "TEST_METRICS".to_owned(), + schema_url: SchemaUrl("https://example.com/schemas/1.2.3".to_owned()), registry: Registry { attributes: vec![memory_state_attr.clone()], attribute_groups: vec![], @@ -1002,8 +1005,7 @@ mod tests { }; VersionedRegistry::V2(ForgeResolvedRegistry { - registry_url: "TEST".to_owned(), - + schema_url: SchemaUrl("https://example.com/schemas/1.2.3".to_owned()), registry: Registry { attributes: vec![custom_string_attr.clone()], attribute_groups: vec![], @@ -1517,7 +1519,7 @@ mod tests { }; VersionedRegistry::V2(ForgeResolvedRegistry { - registry_url: "TEST_EVENTS".to_owned(), + schema_url: SchemaUrl("https://example.com/schemas/1.2.3".to_owned()), registry: Registry { attributes: vec![session_id_attr.clone(), session_previous_id_attr.clone()], attribute_groups: vec![], diff --git a/crates/weaver_mcp/src/service.rs b/crates/weaver_mcp/src/service.rs index 5f945310f..ea4fc2617 100644 --- a/crates/weaver_mcp/src/service.rs +++ b/crates/weaver_mcp/src/service.rs @@ -386,13 +386,14 @@ mod tests { use weaver_search::SearchType; use weaver_semconv::attribute::AttributeType; use weaver_semconv::group::{InstrumentSpec, SpanKindSpec}; + use weaver_semconv::manifest::SchemaUrl; use weaver_semconv::stability::Stability; use weaver_semconv::v2::span::SpanName; use weaver_semconv::v2::CommonFields; fn make_test_registry() -> ForgeResolvedRegistry { ForgeResolvedRegistry { - registry_url: "test".to_owned(), + schema_url: SchemaUrl("https://todo/1.0.0".to_owned()), registry: Registry { attributes: vec![Attribute { key: "http.request.method".to_owned(), diff --git a/crates/weaver_resolved_schema/src/lib.rs b/crates/weaver_resolved_schema/src/lib.rs index bc7e371a4..655ddbb21 100644 --- a/crates/weaver_resolved_schema/src/lib.rs +++ b/crates/weaver_resolved_schema/src/lib.rs @@ -15,7 +15,7 @@ use serde::{Deserialize, Serialize}; use std::collections::HashMap; use weaver_semconv::deprecated::Deprecated; use weaver_semconv::group::GroupType; -use weaver_semconv::manifest::RegistryManifest; +use weaver_semconv::manifest::{RegistryManifest, SchemaUrl}; use weaver_version::schema_changes::{SchemaChanges, SchemaItemChange, SchemaItemType}; use weaver_version::Versions; @@ -51,10 +51,8 @@ pub(crate) const V2_RESOLVED_FILE_FORMAT: &str = "resolved/2.0.0"; pub struct ResolvedTelemetrySchema { /// Version of the file structure. pub file_format: String, - /// Schema URL that this file is published at. - pub schema_url: String, - /// The ID of the registry that this schema belongs to. - pub registry_id: String, + /// Schema URL that this file is or will be published at. + pub schema_url: SchemaUrl, /// The registry that this schema belongs to. pub registry: Registry, /// Catalog of unique items that are shared across multiple registries @@ -79,7 +77,8 @@ pub struct ResolvedTelemetrySchema { #[serde(skip_serializing_if = "Option::is_none")] pub versions: Option, /// The manifest of the registry. - pub registry_manifest: Option, + #[serde(skip)] + pub manifest: Option, } /// Statistics on a resolved telemetry schema. @@ -94,11 +93,11 @@ pub struct Stats { impl ResolvedTelemetrySchema { /// Create a new resolved telemetry schema. - pub fn new>(schema_url: S, registry_id: S, registry_url: S) -> Self { + pub fn new>(schema_url: S, registry_url: S) -> Self { Self { file_format: V1_RESOLVED_FILE_FORMAT.to_owned(), - schema_url: schema_url.as_ref().to_owned(), - registry_id: registry_id.as_ref().to_owned(), + // TODO: is it correct? + schema_url: SchemaUrl(schema_url.as_ref().to_owned()), registry: Registry::new(registry_url), catalog: Catalog::default(), resource: None, @@ -539,7 +538,7 @@ mod tests { #[test] fn no_diff() { - let mut prior_schema = ResolvedTelemetrySchema::new("1.0", "", ""); + let mut prior_schema = ResolvedTelemetrySchema::new("1.0", ""); prior_schema.add_attribute_group( "group1", [ @@ -556,7 +555,7 @@ mod tests { #[test] fn detect_2_added_registry_attributes() { - let mut prior_schema = ResolvedTelemetrySchema::new("1.0", "", ""); + let mut prior_schema = ResolvedTelemetrySchema::new("1.0", ""); prior_schema.add_attribute_group( "registry.group1", [ @@ -565,7 +564,7 @@ mod tests { ], ); - let mut latest_schema = ResolvedTelemetrySchema::new("1.0", "", ""); + let mut latest_schema = ResolvedTelemetrySchema::new("1.0", ""); latest_schema.add_attribute_group( "registry.group1", [ @@ -584,7 +583,7 @@ mod tests { #[test] fn detect_2_deprecated_registry_attributes() { - let mut prior_schema = ResolvedTelemetrySchema::new("1.0", "", ""); + let mut prior_schema = ResolvedTelemetrySchema::new("1.0", ""); prior_schema.add_attribute_group( "registry.group1", [ @@ -598,7 +597,7 @@ mod tests { ], ); - let mut latest_schema = ResolvedTelemetrySchema::new("1.0", "", ""); + let mut latest_schema = ResolvedTelemetrySchema::new("1.0", ""); latest_schema.add_attribute_group( "registry.group1", [ @@ -645,7 +644,7 @@ mod tests { #[test] fn detect_2_renamed_registry_attributes() { - let mut prior_schema = ResolvedTelemetrySchema::new("1.0", "", ""); + let mut prior_schema = ResolvedTelemetrySchema::new("http://test/schemas/1.0", ""); prior_schema.add_attribute_group( "registry.group1", [ @@ -659,7 +658,7 @@ mod tests { // 2 new attributes are added: attr2_bis and attr3_bis // attr2 is renamed attr2_bis // attr3 is renamed attr3_bis - let mut latest_schema = ResolvedTelemetrySchema::new("1.0", "", ""); + let mut latest_schema = ResolvedTelemetrySchema::new("http://test/schemas/2.0", ""); latest_schema.add_attribute_group( "registry.group1", [ @@ -693,7 +692,7 @@ mod tests { #[test] fn detect_2_attributes_renamed_to_the_same_existing_attribute() { - let mut prior_schema = ResolvedTelemetrySchema::new("1.0", "", ""); + let mut prior_schema = ResolvedTelemetrySchema::new("1.0", ""); prior_schema.add_attribute_group( "registry.group1", [ @@ -705,7 +704,7 @@ mod tests { ); prior_schema.add_attribute_group("group2", [Attribute::string("attr5", "brief", "note")]); - let mut latest_schema = ResolvedTelemetrySchema::new("1.0", "", ""); + let mut latest_schema = ResolvedTelemetrySchema::new("1.0", ""); latest_schema.add_attribute_group( "registry.group1", [ @@ -732,7 +731,7 @@ mod tests { #[test] fn detect_2_attributes_renamed_to_the_same_new_attribute() { - let mut prior_schema = ResolvedTelemetrySchema::new("1.0", "", ""); + let mut prior_schema = ResolvedTelemetrySchema::new("1.0", ""); prior_schema.add_attribute_group( "registry.group1", [ @@ -743,7 +742,7 @@ mod tests { ], ); - let mut latest_schema = ResolvedTelemetrySchema::new("1.0", "", ""); + let mut latest_schema = ResolvedTelemetrySchema::new("1.0", ""); latest_schema.add_attribute_group( "registry.group1", [ @@ -776,7 +775,7 @@ mod tests { /// However, detecting this case is useful for identifying a violation of the process. #[test] fn detect_2_removed_attributes() { - let mut prior_schema = ResolvedTelemetrySchema::new("1.0", "", ""); + let mut prior_schema = ResolvedTelemetrySchema::new("1.0", ""); prior_schema.add_attribute_group( "registry.group1", [ @@ -787,7 +786,7 @@ mod tests { ], ); - let mut latest_schema = ResolvedTelemetrySchema::new("1.0", "", ""); + let mut latest_schema = ResolvedTelemetrySchema::new("1.0", ""); latest_schema.add_attribute_group( "registry.group1", [ @@ -805,9 +804,9 @@ mod tests { // TODO add many more group diff checks for various capabilities. #[test] fn detect_metric_name_change() { - let mut prior_schema = ResolvedTelemetrySchema::new("1.0", "test/base_version", ""); + let mut prior_schema = ResolvedTelemetrySchema::new("http://test/schemas/1.0", ""); prior_schema.add_metric_group("metrics.cpu.time", "cpu.time", [], None); - let mut latest_schema = ResolvedTelemetrySchema::new("1.0", "test/new_version", ""); + let mut latest_schema = ResolvedTelemetrySchema::new("http://test/schemas/2.0", ""); latest_schema.add_metric_group( "metrics.cpu.time", "cpu.time", diff --git a/crates/weaver_resolved_schema/src/v2/mod.rs b/crates/weaver_resolved_schema/src/v2/mod.rs index 28e2d7cc1..7acc570e4 100644 --- a/crates/weaver_resolved_schema/src/v2/mod.rs +++ b/crates/weaver_resolved_schema/src/v2/mod.rs @@ -7,7 +7,7 @@ use serde::{Deserialize, Serialize}; use weaver_semconv::{ deprecated::Deprecated, group::GroupType, - manifest::RegistryManifest, + manifest::{RegistryManifest, SchemaUrl}, v2::{ attribute_group::AttributeGroupVisibilitySpec, signal_id::SignalId, span::SpanName, CommonFields, @@ -50,9 +50,7 @@ pub struct ResolvedTelemetrySchema { /// Version of the file structure. pub file_format: String, /// Schema URL that this file is published at. - pub schema_url: String, - /// The ID of the registry that this schema belongs to. - pub registry_id: String, + pub schema_url: SchemaUrl, /// Catalog of attributes. Note: this will include duplicates for the same key. pub attribute_catalog: Vec, /// The registry that this schema belongs to. @@ -60,8 +58,8 @@ pub struct ResolvedTelemetrySchema { /// Refinements for the registry pub refinements: Refinements, /// The manifest of the registry. - #[serde(skip_serializing)] - pub registry_manifest: Option, + #[serde(skip)] + pub manifest: Option, } impl ResolvedTelemetrySchema { @@ -131,7 +129,6 @@ impl TryFrom for ResolvedTelemetrySchema { Ok(ResolvedTelemetrySchema { file_format: V2_RESOLVED_FILE_FORMAT.to_owned(), schema_url: value.schema_url, - registry_id: value.registry_id, attribute_catalog, registry, refinements, @@ -505,7 +502,6 @@ pub fn convert_v1_to_v2( } let v2_registry = Registry { - registry_url: r.registry_url, attributes, spans, metrics, @@ -989,11 +985,10 @@ mod tests { fn test_try_from_v1_to_v2() { let v1_schema = crate::ResolvedTelemetrySchema { file_format: V1_RESOLVED_FILE_FORMAT.to_owned(), - schema_url: "my.schema.url".to_owned(), - registry_id: "my-registry".to_owned(), + schema_url: SchemaUrl("http://test/schemas/1.0.0".to_owned()), catalog: crate::catalog::Catalog::from_attributes(vec![]), registry: crate::registry::Registry { - registry_url: "my.schema.url".to_owned(), + registry_url: "http://test/schemas/1.0".to_owned(), groups: vec![], }, instrumentation_library: None, @@ -1007,8 +1002,10 @@ mod tests { assert!(v2_schema.is_ok()); let v2_schema = v2_schema.unwrap(); assert_eq!(v2_schema.file_format, V2_RESOLVED_FILE_FORMAT); - assert_eq!(v2_schema.schema_url, "my.schema.url"); - assert_eq!(v2_schema.registry_id, "my-registry"); + assert_eq!( + v2_schema.schema_url, + SchemaUrl("http://test/schemas/1.0.0".to_owned()) + ); } #[test] @@ -1216,13 +1213,11 @@ mod tests { fn empty_v2_schema() -> ResolvedTelemetrySchema { ResolvedTelemetrySchema { file_format: V2_RESOLVED_FILE_FORMAT.to_owned(), - schema_url: "my.schema.url".to_owned(), - registry_id: "main".to_owned(), + schema_url: SchemaUrl("http://test/schemas/1.0".to_owned()), attribute_catalog: vec![], registry: Registry { attributes: vec![], attribute_groups: vec![], - registry_url: "todo".to_owned(), spans: vec![], metrics: vec![], events: vec![], diff --git a/crates/weaver_resolved_schema/src/v2/registry.rs b/crates/weaver_resolved_schema/src/v2/registry.rs index bdea11ce1..147977427 100644 --- a/crates/weaver_resolved_schema/src/v2/registry.rs +++ b/crates/weaver_resolved_schema/src/v2/registry.rs @@ -35,11 +35,6 @@ pub struct Registry { /// Catalog of (public) attribute groups. pub attribute_groups: Vec, - /// The semantic convention registry url. - /// - /// This is the base URL, under which this registry can be found. - pub registry_url: String, - /// A list of span signal definitions. pub spans: Vec, @@ -267,7 +262,6 @@ mod test { }]; let registry = Registry { attribute_groups: vec![], - registry_url: "https://opentelemetry.io/schemas/1.23.0".to_owned(), spans: vec![Span { r#type: "test.span".to_owned().into(), kind: SpanKindSpec::Client, diff --git a/crates/weaver_resolver/data/circular-registry-test/registry_a/registry_manifest.yaml b/crates/weaver_resolver/data/circular-registry-test/registry_a/registry_manifest.yaml index 0fcd0ce52..8e5b52e46 100644 --- a/crates/weaver_resolver/data/circular-registry-test/registry_a/registry_manifest.yaml +++ b/crates/weaver_resolver/data/circular-registry-test/registry_a/registry_manifest.yaml @@ -3,5 +3,5 @@ description: Test registry A for circular dependency testing. semconv_version: 0.1.0 schema_base_url: https://example.com/registry_a/schemas/ dependencies: - - name: registry_b + - schema_url: https://example.com/registry_b/schemas/1.0.0 registry_path: data/circular-registry-test/registry_b \ No newline at end of file diff --git a/crates/weaver_resolver/data/circular-registry-test/registry_b/registry_manifest.yaml b/crates/weaver_resolver/data/circular-registry-test/registry_b/registry_manifest.yaml index d09a81eb1..f50389496 100644 --- a/crates/weaver_resolver/data/circular-registry-test/registry_b/registry_manifest.yaml +++ b/crates/weaver_resolver/data/circular-registry-test/registry_b/registry_manifest.yaml @@ -3,5 +3,7 @@ description: Test registry B for circular dependency testing. semconv_version: 0.1.0 schema_base_url: https://example.com/registry_b/schemas/ dependencies: - - name: registry_a + # TODO: support legacy name-based dependencies as well (with warning) + #- name: registry_a + - schema_url: https://example.com/registry_a/schemas/1.0.0 registry_path: data/circular-registry-test/registry_a \ No newline at end of file diff --git a/crates/weaver_resolver/data/multi-registry/app_registry/registry_manifest.yaml b/crates/weaver_resolver/data/multi-registry/app_registry/registry_manifest.yaml index d9cf26bc9..be13985a8 100644 --- a/crates/weaver_resolver/data/multi-registry/app_registry/registry_manifest.yaml +++ b/crates/weaver_resolver/data/multi-registry/app_registry/registry_manifest.yaml @@ -3,5 +3,5 @@ description: This registry contains the semantic conventions for the App. semconv_version: 0.1.0 schema_base_url: https://app.com/schemas/ dependencies: - - name: acme + - schema_url: https://acme.com/schemas/0.1.0 registry_path: data/multi-registry/custom_registry diff --git a/crates/weaver_resolver/data/multi-registry/custom_registry/registry_manifest.yaml b/crates/weaver_resolver/data/multi-registry/custom_registry/registry_manifest.yaml index 711eb37f4..8e0a5081b 100644 --- a/crates/weaver_resolver/data/multi-registry/custom_registry/registry_manifest.yaml +++ b/crates/weaver_resolver/data/multi-registry/custom_registry/registry_manifest.yaml @@ -3,5 +3,5 @@ description: This registry contains the semantic conventions for the Acme vendor semconv_version: 0.1.0 schema_base_url: https://acme.com/schemas/ dependencies: - - name: otel + - schema_url: https://opentelemetry.io/schemas/1.30.0 registry_path: data/multi-registry/otel_registry diff --git a/crates/weaver_resolver/data/registry-test-published-1/expected-registry.json b/crates/weaver_resolver/data/registry-test-published-1/expected-registry.json index 0f2def153..e0b0e9af3 100644 --- a/crates/weaver_resolver/data/registry-test-published-1/expected-registry.json +++ b/crates/weaver_resolver/data/registry-test-published-1/expected-registry.json @@ -58,12 +58,12 @@ "name": "my-span", "lineage": { "provenance": { - "registry_id": "acme", + "registry_id": "acme.com/schemas", "path": "data/registry-test-published-1/registry/main.yaml" }, "attributes": { "a": { - "source_group": "v2_dependency.published", + "source_group": "v2_dependency.opentelemetry.io/schemas", "inherited_fields": [ "annotations", "brief", diff --git a/crates/weaver_resolver/data/registry-test-published-1/published/resolved_schema.yaml b/crates/weaver_resolver/data/registry-test-published-1/published/resolved_schema.yaml index 189cdb46f..96103a4f1 100644 --- a/crates/weaver_resolver/data/registry-test-published-1/published/resolved_schema.yaml +++ b/crates/weaver_resolver/data/registry-test-published-1/published/resolved_schema.yaml @@ -1,13 +1,11 @@ file_format: resolved/2.0.0 -schema_url: http://todo -registry_id: published +schema_url: https://opentelemetry.io/schemas/1.0.0 attribute_catalog: - key: a type: string brief: test a stability: stable registry: - registry_url: todo-why? attributes: - 0 attribute_groups: diff --git a/crates/weaver_resolver/data/registry-test-published-1/registry/registry_manifest.yaml b/crates/weaver_resolver/data/registry-test-published-1/registry/registry_manifest.yaml index ad5d44275..d48e2ab4e 100644 --- a/crates/weaver_resolver/data/registry-test-published-1/registry/registry_manifest.yaml +++ b/crates/weaver_resolver/data/registry-test-published-1/registry/registry_manifest.yaml @@ -3,5 +3,5 @@ description: This registry contains the semantic conventions for the Acme vendor semconv_version: 0.1.0 schema_base_url: https://acme.com/schemas/ dependencies: - - name: published + - schema_url: https://example.com/schemas/1.2.3 registry_path: data/registry-test-published-1/published diff --git a/crates/weaver_resolver/src/attribute.rs b/crates/weaver_resolver/src/attribute.rs index 99f782b3f..908101c7f 100644 --- a/crates/weaver_resolver/src/attribute.rs +++ b/crates/weaver_resolver/src/attribute.rs @@ -321,7 +321,7 @@ impl AttributeLookup for V1Schema { impl AttributeLookup for V2Schema { fn lookup_attribute(&self, key: &str) -> Option { - let fake_group_id = format!("v2_dependency.{}", self.registry_id); + let fake_group_id = format!("v2_dependency.{}", self.schema_url.name()); self.attribute_catalog.iter().find_map(|attr| { if attr.key == key { Some(AttributeWithGroupId { diff --git a/crates/weaver_resolver/src/dependency.rs b/crates/weaver_resolver/src/dependency.rs index 24ead7e59..7345d14ba 100644 --- a/crates/weaver_resolver/src/dependency.rs +++ b/crates/weaver_resolver/src/dependency.rs @@ -167,7 +167,7 @@ impl ImportableDependency for V2Schema { for ar in m.attributes.iter() { let attr = self.attribute_catalog.attribute(&ar.base).ok_or( Error::InvalidRegistryAttributeRef { - registry_id: self.registry_id.clone(), + registry_name: self.schema_url.name().clone(), attribute_ref: ar.base.0, }, )?; @@ -214,7 +214,7 @@ impl ImportableDependency for V2Schema { for ar in e.attributes.iter() { let attr = self.attribute_catalog.attribute(&ar.base).ok_or( Error::InvalidRegistryAttributeRef { - registry_id: self.registry_id.clone(), + registry_name: self.schema_url.name().clone(), attribute_ref: ar.base.0, }, )?; @@ -262,7 +262,7 @@ impl ImportableDependency for V2Schema { // TODO - this should be non-panic errors. let attr = self.attribute_catalog.attribute(&ar.base).ok_or( Error::InvalidRegistryAttributeRef { - registry_id: self.registry_id.clone(), + registry_name: self.schema_url.name().clone(), attribute_ref: ar.base.0, }, )?; @@ -276,7 +276,7 @@ impl ImportableDependency for V2Schema { // TODO - this should be non-panic errors. let attr = self.attribute_catalog.attribute(&ar.base).ok_or( Error::InvalidRegistryAttributeRef { - registry_id: self.registry_id.clone(), + registry_name: self.schema_url.name().clone(), attribute_ref: ar.base.0, }, )?; @@ -439,6 +439,7 @@ mod tests { use itertools::Itertools; use std::error::Error; use weaver_resolved_schema::ResolvedTelemetrySchema as V1Schema; + use weaver_semconv::manifest::SchemaUrl; use crate::dependency::{ResolvedDependency, UnresolvedAttributeLookup}; @@ -470,8 +471,7 @@ mod tests { fn example_v1_schema() -> V1Schema { V1Schema { file_format: "resolved/1.0.0".to_owned(), - schema_url: "v1-example".to_owned(), - registry_id: "v1-example".to_owned(), + schema_url: SchemaUrl("http://test/schemas/1.0.0".to_owned()), registry: weaver_resolved_schema::registry::Registry { registry_url: "v1-example".to_owned(), groups: vec![weaver_resolved_schema::registry::Group { diff --git a/crates/weaver_resolver/src/error.rs b/crates/weaver_resolver/src/error.rs index f2e01eb11..e8d2add5a 100644 --- a/crates/weaver_resolver/src/error.rs +++ b/crates/weaver_resolver/src/error.rs @@ -19,21 +19,26 @@ pub enum Error { FailToResolveDefinition(#[from] weaver_semconv::Error), /// We discovered a circular dependency we cannot resolve. - #[error("Circular dependency detected: registry '{registry_id}' depends on itself through the chain: {chain}")] + #[error("Circular dependency detected: registry '{registry_name}' depends on itself through the chain: {chain}")] CircularDependency { /// The registry that depends on itself. - registry_id: String, + registry_name: String, + /// A string representing the dependency chain. chain: String, }, /// We've reached the maximum dependency depth for this registry. - #[error("Maximum dependency depth reached for registry `{registry}`. Cannot load further dependencies.")] + #[error("Maximum dependency depth reached for registry `{registry_name}`. Cannot load further dependencies.")] MaximumDependencyDepth { /// The registry which has too many dependencies. - registry: String, + registry_name: String, }, + /// Failed to resolve the schema URL for a registry. + #[error("Schema URL is missing in the manifest and cannot be constructed from the registry name and version.")] + FailToResolveSchemaUrl {}, + /// An invalid URL. #[error("Invalid URL `{url:?}`, error: {error:?})")] #[diagnostic(help("Check the URL and try again."))] @@ -168,10 +173,12 @@ pub enum Error { }, /// We - #[error("Invalid registry: {registry_id}. Unable to find attribute by index: {attribute_ref}")] + #[error( + "Invalid registry: {registry_name}. Unable to find attribute by index: {attribute_ref}" + )] InvalidRegistryAttributeRef { /// The registry with the issue. - registry_id: String, + registry_name: String, /// The attribute index that does not exist in the registry. attribute_ref: u32, }, diff --git a/crates/weaver_resolver/src/lib.rs b/crates/weaver_resolver/src/lib.rs index 940dc104a..587e8ddd9 100644 --- a/crates/weaver_resolver/src/lib.rs +++ b/crates/weaver_resolver/src/lib.rs @@ -3,6 +3,7 @@ #![doc = include_str!("../README.md")] use weaver_semconv::group::ImportsWithProvenance; +use weaver_semconv::manifest::SchemaUrl; use crate::attribute::AttributeCatalog; use crate::dependency::ResolvedDependency; @@ -88,8 +89,20 @@ impl SchemaResolver { WResult::FatalErr(e) => return WResult::FatalErr(e), } } - let registry_id: String = repo.id().to_string(); let manifest = repo.manifest().cloned(); + let schema_url = if let Some(m) = manifest.as_ref() { + match m.schema_url.clone() { + Some(url) => url, + None => { + return WResult::FatalErr(Error::FailToResolveSchemaUrl {}); + } + } + } else { + match SchemaUrl::from_name_version(&repo.name(), &repo.version()) { + Ok(url) => url, + Err(_) => return WResult::FatalErr(Error::FailToResolveSchemaUrl {}), + } + }; let mut attr_catalog = AttributeCatalog::default(); // TODO - Do something with non_fatal_errors if we need to. resolve_registry_with_dependencies( @@ -105,8 +118,7 @@ impl SchemaResolver { ResolvedTelemetrySchema { file_format: "1.0.0".to_owned(), - schema_url: "".to_owned(), - registry_id, + schema_url: schema_url, registry: resolved_registry, catalog, resource: None, @@ -235,7 +247,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "data/multi-registry/custom_registry".to_owned(), }; - let registry_repo = RegistryRepo::try_new("main", ®istry_path)?; + let registry_repo = RegistryRepo::try_new(Some("main"), Some("1.0.0"), ®istry_path)?; // test with the `include_unreferenced` flag set to false check_semconv_load_and_resolve(registry_repo.clone(), false); // test with the `include_unreferenced` flag set to true @@ -249,7 +261,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "data/multi-registry/app_registry".to_owned(), }; - let registry_repo = RegistryRepo::try_new("app", ®istry_path)?; + let registry_repo = RegistryRepo::try_new(None, None, ®istry_path)?; let result = SchemaResolver::load_semconv_repository(registry_repo, true); match result { @@ -263,19 +275,22 @@ mod tests { ); // Verify we have specs from all three registries - let registry_ids = loaded.registry_ids(); + let registry_names = loaded.registry_names(); assert!( - registry_ids.contains(&"app".to_owned()), - "Missing app registry specs" + registry_names.contains(&"app.com/schemas".to_owned()), + "Missing app registry specs, available registries: {:?}", + registry_names ); assert!( - registry_ids.contains(&"acme".to_owned()), - "Missing acme registry specs" + registry_names.contains(&"acme.com/schemas".to_owned()), + "Missing acme registry specs, available registries: {:?}", + registry_names ); assert!( - registry_ids.contains(&"otel".to_owned()), - "Missing otel registry specs" + registry_names.contains(&"opentelemetry.io/schemas".to_owned()), + "Missing otel registry specs, available registries: {:?}", + registry_names ); // Now test the resolved registry content diff --git a/crates/weaver_resolver/src/loader.rs b/crates/weaver_resolver/src/loader.rs index 7d1e8af29..34190c5d0 100644 --- a/crates/weaver_resolver/src/loader.rs +++ b/crates/weaver_resolver/src/loader.rs @@ -14,7 +14,7 @@ use weaver_common::result::WResult; use weaver_resolved_schema::v2::ResolvedTelemetrySchema as V2Schema; use weaver_resolved_schema::ResolvedTelemetrySchema as V1Schema; use weaver_semconv::json_schema::JsonSchemaValidator; -use weaver_semconv::registry_repo::{LEGACY_REGISTRY_MANIFEST, REGISTRY_MANIFEST, RegistryRepo}; +use weaver_semconv::registry_repo::{RegistryRepo, LEGACY_REGISTRY_MANIFEST, REGISTRY_MANIFEST}; use weaver_semconv::{group::ImportsWithProvenance, semconv::SemConvSpecWithProvenance}; use crate::Error; @@ -48,9 +48,11 @@ impl LoadedSemconvRegistry { use weaver_common::vdir::VirtualDirectoryPath; use weaver_semconv::provenance::Provenance; let path: VirtualDirectoryPath = "data".try_into().expect("Bad fake path for test"); - let repo = RegistryRepo::try_new("default", &path).map_err(|e| Error::InvalidUrl { - url: "test string".to_owned(), - error: format!("{e}"), + let repo = RegistryRepo::try_new(Some("default"), Some("1.0.0"), &path).map_err(|e| { + Error::InvalidUrl { + url: "test string".to_owned(), + error: format!("{e}"), + } })?; let provenance = Provenance::new("default", ""); let spec_with_provenance = SemConvSpecWithProvenance::from_string(provenance, spec) @@ -79,8 +81,8 @@ impl LoadedSemconvRegistry { match self { LoadedSemconvRegistry::Unresolved { repo, .. } => repo.registry_path_repr(), // TODO - are these correct? - LoadedSemconvRegistry::Resolved(schema) => &schema.schema_url, - LoadedSemconvRegistry::ResolvedV2(schema) => &schema.schema_url, + LoadedSemconvRegistry::Resolved(schema) => &schema.schema_url.0, + LoadedSemconvRegistry::ResolvedV2(schema) => &schema.schema_url.0, } } @@ -104,19 +106,19 @@ impl LoadedSemconvRegistry { /// Returns all the registry ids in this loaded registry and its dependencies. #[cfg(test)] #[must_use] - pub fn registry_ids(&self) -> Vec { + pub fn registry_names(&self) -> Vec { match self { LoadedSemconvRegistry::Unresolved { repo, dependencies, .. } => { - let mut result = vec![repo.id().to_string()]; + let mut result = vec![repo.name().to_string()]; for d in dependencies { - result.extend(d.registry_ids()); + result.extend(d.registry_names()); } result } - LoadedSemconvRegistry::Resolved(schema) => vec![schema.registry_id.clone()], - LoadedSemconvRegistry::ResolvedV2(schema) => vec![schema.registry_id.clone()], + LoadedSemconvRegistry::Resolved(schema) => vec![schema.schema_url.name().clone()], + LoadedSemconvRegistry::ResolvedV2(schema) => vec![schema.schema_url.name().clone()], } } } @@ -132,11 +134,11 @@ impl Display for LoadedSemconvRegistry { } => write!( f, "{} - [{}]", - repo.id(), + repo.schema_url(), dependencies.iter().map(|d| format!("{d}")).join(",") ), - LoadedSemconvRegistry::Resolved(schema) => write!(f, "{}", schema.registry_id), - LoadedSemconvRegistry::ResolvedV2(schema) => write!(f, "{}", schema.registry_id), + LoadedSemconvRegistry::Resolved(schema) => write!(f, "{}", schema.schema_url), + LoadedSemconvRegistry::ResolvedV2(schema) => write!(f, "{}", schema.schema_url), } } } @@ -173,22 +175,22 @@ fn load_semconv_repository_recursive( // Make sure we don't go past our max dependency depth. if max_dependency_depth == 0 { return WResult::FatalErr(Error::MaximumDependencyDepth { - registry: registry_repo.registry_path_repr().to_owned(), + registry_name: registry_repo.registry_path_repr().to_owned(), }); } - let registry_id = registry_repo.id().to_string(); + let registry_name = registry_repo.name().to_string(); // Check for circular dependency - if visited_registries.contains(®istry_id) { - dependency_chain.push(registry_id.clone()); + if visited_registries.contains(®istry_name) { + dependency_chain.push(registry_name.clone()); let chain_str = dependency_chain.join(" → "); return WResult::FatalErr(Error::CircularDependency { - registry_id, + registry_name: registry_name.clone(), chain: chain_str, }); } // Add current registry to visited set and dependency chain - let _ = visited_registries.insert(registry_id.clone()); - dependency_chain.push(registry_id.clone()); + let _ = visited_registries.insert(registry_name.clone()); + dependency_chain.push(registry_name.clone()); // Either load a fully resolved repository, or read in raw files. if let Some(manifest) = registry_repo.manifest() { @@ -202,7 +204,18 @@ fn load_semconv_repository_recursive( let mut loaded_dependencies = vec![]; let mut non_fatal_errors = vec![]; for d in manifest.dependencies.iter() { - match RegistryRepo::try_new(&d.name, &d.registry_path) { + let registry_path = d.registry_path.clone().unwrap_or_else(|| { + // If no registry path is provided, we assume it's the same as the parent registry. + VirtualDirectoryPath::RemoteArchive { + url: d.schema_url.to_string(), + sub_folder: None, + } + }); + match RegistryRepo::try_new( + Some(&d.schema_url.name()), + Some(&d.schema_url.version()), + ®istry_path, + ) { Ok(d_repo) => { // so we need to make sure the dependency chain only include direct dependencies of each other. match load_semconv_repository_recursive( @@ -305,7 +318,7 @@ fn load_definition_repository( // TODO - less confusing way to load semconv specs. vec![SemConvRegistry::semconv_spec_from_file( - ®istry_repo.id(), + ®istry_repo.name(), entry.path(), &unversioned_validator, &versioned_validator, @@ -398,7 +411,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "data/multi-registry/custom_registry".to_owned(), }; - let registry_repo = RegistryRepo::try_new("main", ®istry_path)?; + let registry_repo = RegistryRepo::try_new(Some("main"), Some("1.0.0"), ®istry_path)?; let mut diag_msgs = DiagnosticMessages::empty(); let loaded = load_semconv_repository(registry_repo, false) .capture_non_fatal_errors(&mut diag_msgs)?; @@ -410,7 +423,7 @@ mod tests { dependencies, } = loaded { - assert_eq!("acme", repo.id().as_ref()); + assert_eq!("acme.com/schemas", repo.name().as_ref()); assert_eq!(dependencies.len(), 1); assert_eq!(specs.len(), 1); assert_eq!(imports.len(), 1); @@ -421,7 +434,7 @@ mod tests { dependencies, }] = &dependencies.as_slice() { - assert_eq!("otel", repo.id().as_ref()); + assert_eq!("opentelemetry.io/schemas", repo.name().as_ref()); assert_eq!(dependencies.len(), 0); assert_eq!(specs.len(), 1); assert_eq!(imports.len(), 0); @@ -440,7 +453,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "data/multi-registry/app_registry".to_owned(), }; - let registry_repo = RegistryRepo::try_new("app", ®istry_path)?; + let registry_repo = RegistryRepo::try_new(Some("app"), Some("1.0.0"), ®istry_path)?; // Try with depth limit of 1 - should fail at acme->otel transition let mut visited_registries = HashSet::new(); @@ -475,7 +488,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "data/circular-registry-test/registry_a".to_owned(), }; - let registry_repo = RegistryRepo::try_new("registry_a", ®istry_path)?; + let registry_repo = RegistryRepo::try_new(Some("registry_a"), Some(""), ®istry_path)?; let result = load_semconv_repository(registry_repo, true); match result { diff --git a/crates/weaver_resolver/src/registry.rs b/crates/weaver_resolver/src/registry.rs index 3b6ae70a3..e1efbf897 100644 --- a/crates/weaver_resolver/src/registry.rs +++ b/crates/weaver_resolver/src/registry.rs @@ -918,12 +918,14 @@ mod tests { let observed_output_dir = PathBuf::from(format!("observed_output/{test_dir}")); std::fs::create_dir_all(observed_output_dir.clone()) .expect("Failed to create observed output directory"); - let registry_id = "default"; + let registry_name = "default"; + let registry_version = "0.1.0"; let location: VirtualDirectoryPath = format!("{test_dir}/registry") .try_into() .expect("Failed to parse file directory"); let loaded = SchemaResolver::load_semconv_repository( - RegistryRepo::try_new(registry_id, &location).expect("Failed to load registry"), + RegistryRepo::try_new(Some(registry_name), Some(registry_version), &location) + .expect("Failed to load registry"), true, ) .ignore(|e| { @@ -1105,6 +1107,7 @@ groups: #[test] fn test_api_usage() -> Result<(), Box> { let registry_id = "local"; + let registry_version = "1.0.0"; // Load a semantic convention registry from a local directory. // Note: A method is also available to load a registry from a git @@ -1113,7 +1116,8 @@ groups: let path = VirtualDirectoryPath::LocalFolder { path: "data/registry-test-7-spans/registry".to_owned(), }; - let repo = RegistryRepo::try_new(registry_id, &path)?; + + let repo = RegistryRepo::try_new(Some(registry_id), Some(registry_version), &path)?; let loaded = SchemaResolver::load_semconv_repository(repo, true).into_result_failing_non_fatal()?; let resolved_schema = diff --git a/crates/weaver_search/src/lib.rs b/crates/weaver_search/src/lib.rs index 8dcd39eb7..70a2a1ace 100644 --- a/crates/weaver_search/src/lib.rs +++ b/crates/weaver_search/src/lib.rs @@ -478,6 +478,7 @@ mod tests { use weaver_semconv::attribute::AttributeType; use weaver_semconv::deprecated::Deprecated; use weaver_semconv::group::{InstrumentSpec, SpanKindSpec}; + use weaver_semconv::manifest::SchemaUrl; use weaver_semconv::stability::Stability; use weaver_semconv::v2::span::SpanName; use weaver_semconv::v2::CommonFields; @@ -543,7 +544,7 @@ mod tests { fn make_test_registry() -> ForgeResolvedRegistry { ForgeResolvedRegistry { - registry_url: "test".to_owned(), + schema_url: SchemaUrl("https://example.com/schemas/1.2.3".to_owned()), registry: Registry { attributes: vec![ make_attribute("http.request.method", "HTTP request method", "", false), diff --git a/crates/weaver_semconv/src/manifest.rs b/crates/weaver_semconv/src/manifest.rs index 548d54cf9..f3f82721f 100644 --- a/crates/weaver_semconv/src/manifest.rs +++ b/crates/weaver_semconv/src/manifest.rs @@ -12,11 +12,94 @@ use crate::stability::Stability; use crate::Error; use crate::Error::{InvalidRegistryManifest, RegistryManifestNotFound}; use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::path::PathBuf; use weaver_common::error::handle_errors; use weaver_common::vdir::VirtualDirectoryPath; +/// Represents the schema URL of a registry, which serves as a unique identifier for the registry +/// along with its version. +#[derive(Debug, Clone, PartialEq, Eq, Hash, JsonSchema)] +pub struct SchemaUrl(pub String); + +impl SchemaUrl { + /// Validate the schema URL format. + pub fn validate(&self) -> Result<(), String> { + let parsed = url::Url::parse(&self.0).map_err(|e| format!("Invalid schema URL: {e}"))?; + if parsed.path_segments().map(|c| c.count()).unwrap_or(0) == 0 { + return Err("The schema URL must have at least one path segment.".to_owned()); + } + Ok(()) + } + + /// Returns the registry name, derived from the schema URL. + pub fn name(&self) -> String { + let parsed_url = url::Url::parse(&self.0).expect("schema_url must be valid"); + let path = parsed_url.path().trim_matches('/'); + let mut segments: Vec<&str> = path.split('/').collect(); + if !segments.is_empty() { + _ = segments.pop(); + } + + if segments.is_empty() { + return parsed_url.authority().to_string(); + } + + format!("{}/{}", parsed_url.authority(), segments.join("/")) + } + + /// Returns the registry version, derived from the schema URL. + pub fn version(&self) -> String { + let parsed_url = url::Url::parse(&self.0).expect("schema_url must be valid"); + parsed_url + .path() + .trim_matches('/') + .rsplit('/') + .next() + .unwrap_or("") + .to_string() + } + + /// Create a SchemaUrl from name and version. + pub fn from_name_version(name: &str, version: &str) -> Result { + let schema_url_str; + // TODO: replace with scheme regex + if name.starts_with("http://") || name.starts_with("https://") { + schema_url_str = format!("{}/{}", name.trim_end_matches('/'), version); + } else { + schema_url_str = format!("https://{}/{}", name.trim_end_matches('/'), version); + } + let schema_url = SchemaUrl(schema_url_str); + schema_url.validate()?; + Ok(schema_url) + } +} + +impl std::fmt::Display for SchemaUrl { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.0) + } +} + +impl<'de> Deserialize<'de> for SchemaUrl { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let s = String::deserialize(deserializer)?; + Ok(SchemaUrl(s)) + } +} + +impl Serialize for SchemaUrl { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_str(&self.0) + } +} + /// Represents the information of a semantic convention registry manifest. /// /// This information defines the registry's name, version, description, and schema @@ -34,7 +117,7 @@ pub struct RegistryManifest { /// a unique identifier of the registry. It MUST follow OTel schema URL format, which is: /// `http[s]://server[:port]/path/`. /// See https://github.com/open-telemetry/opentelemetry-specification/blob/v1.53.0/specification/schemas/README.md#schema-url for more details. - pub schema_url: Option, + pub schema_url: Option, /// An optional description of the registry. /// @@ -73,17 +156,25 @@ pub struct RegistryManifest { pub resolved_schema_uri: Option, } -/// Represents a dependency of a semantic convention registry. +/// Represents a dependency of a semantic convention registry as defined in YAML. #[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] pub struct Dependency { - /// The name of the dependency. - pub name: String, - /// The path to the dependency. + /// The schema URL for the dependency (required). + /// It must follow OTel schema URL format, which is: `http[s]://server[:port]/path/`. + /// This is not necessarily the URL registry can be accessed at, but it provides + /// a unique identifier for the dependency registry and its version. /// + /// When registry is not published yet, this field should be populated with a placeholder URL, + /// but it must follow the URL format and include a version segment. + /// The actual registry files can be provided in `registry_path` field. + pub schema_url: SchemaUrl, + + /// The path to the dependency (optional). /// This can be either: /// - A manifest of a published registry /// - A directory containing the raw definition. - pub registry_path: VirtualDirectoryPath, + #[serde(skip_serializing_if = "Option::is_none")] + pub registry_path: Option, } impl RegistryManifest { @@ -115,11 +206,16 @@ impl RegistryManifest { // If the schema URL is not provided, populate it using deprecated schema_base_url and semconv_version // validation would fail if they were not provided if manifest.schema_url.is_none() { - manifest.schema_url = Some(format!( - "{}/{}", - manifest.schema_base_url.clone().unwrap_or_default(), - manifest.semconv_version.clone().unwrap_or_default() - )); + manifest.schema_url = Some( + SchemaUrl::from_name_version( + &manifest.schema_base_url.clone().unwrap_or_default(), + &manifest.semconv_version.clone().unwrap_or_default(), + ) + .map_err(|e| InvalidRegistryManifest { + path: manifest_path_buf.clone(), + error: e, + })?, + ); } Ok(manifest) @@ -128,43 +224,52 @@ impl RegistryManifest { fn validate(&self, path: PathBuf) -> Result<(), Error> { let mut errors = vec![]; - let schema_url_empty = self.schema_url.as_ref().map_or(true, |url| url.is_empty()); - let schema_base_url_empty = self.schema_base_url.as_ref().map_or(true, |url| url.is_empty()); - let semconv_version_empty = self.semconv_version.as_ref().map_or(true, |v| v.is_empty()); - - if schema_url_empty { - if schema_base_url_empty || semconv_version_empty { + if self.schema_url.is_none() { + if self.schema_base_url.is_none() || self.semconv_version.is_none() { errors.push(InvalidRegistryManifest { path: path.clone(), error: "The registry schema URL is required.".to_owned(), }); } else { - // schema_base_url should be a valid absolute URL, otherwise push an error to the list. - if let Err(e) = url::Url::parse(self.schema_base_url.as_ref().unwrap()) { + if self + .schema_base_url + .as_ref() + .map_or(true, |url| url.is_empty()) + { errors.push(InvalidRegistryManifest { path: path.clone(), - error: format!("Invalid schema base URL: {}", e), + error: "The registry schema base URL is required.".to_owned(), }); - } - } - } else { - // validate the resolved schema URL: it must be a valid absolute URI with at least one path segment - match url::Url::parse(self.schema_url.as_ref().unwrap()) { - Ok(parsed_url) => { - if parsed_url.path_segments().map(|c| c.count()).unwrap_or(0) == 0 { + } else { + if let Err(e) = url::Url::parse(self.schema_base_url.as_ref().unwrap()) { errors.push(InvalidRegistryManifest { path: path.clone(), - error: "The registry schema URL must have at least one path segment.".to_owned(), + error: format!("Invalid schema base URL: {}", e), }); } } - Err(e) => { + + if self + .semconv_version + .as_ref() + .map_or(true, |version| version.is_empty()) + { errors.push(InvalidRegistryManifest { path: path.clone(), - error: format!("Invalid schema URL: {}", e), + error: "The registry version is required.".to_owned(), }); } } + } else { + // validate the resolved schema URL: it must be a valid absolute URI with at least one path segment + if let Some(url) = self.schema_url.as_ref() { + url.validate().unwrap_or_else(|e| { + errors.push(InvalidRegistryManifest { + path: path.clone(), + error: format!("Invalid schema URL: {}", e), + }); + }); + } } handle_errors(errors)?; @@ -175,29 +280,21 @@ impl RegistryManifest { /// For example, if the schema URL is `https://opentelemetry.io/schemas/sub-component/1.0.0`, /// the registry name would be `opentelemetry.io/schemas/sub-component` pub fn name(&self) -> String { - let schema_url = self.schema_url.as_ref().expect("schema_url was validated"); - let parsed_url = url::Url::parse(schema_url).expect("schema_url was validated"); - let authority = parsed_url.host_str().unwrap_or_default(); - let path = parsed_url.path().trim_matches('/'); - let mut segments: Vec<&str> = path.split('/').collect(); - if !segments.is_empty() { - _ = segments.pop(); - } - format!("{}/{}", authority, segments.join("/")) + self.schema_url + .as_ref() + .map(|url| url.name()) + .unwrap_or_default() + .to_string() } /// Returns the registry version, which is derived from the schema URL. /// For example, if the schema URL is `https://opentelemetry.io/schemas/sub-component/1.0.0`, /// the registry version would be `1.0.0` pub fn version(&self) -> String { - let schema_url = self.schema_url.as_ref().expect("schema_url was validated"); - let parsed_url = url::Url::parse(schema_url).expect("schema_url was validated"); - parsed_url - .path() - .trim_matches('/') - .rsplit('/') - .next() - .unwrap_or("") + self.schema_url + .as_ref() + .map(|url| url.version()) + .unwrap_or_default() .to_string() } } @@ -230,7 +327,7 @@ mod tests { let config = RegistryManifest::try_from_file("tests/test_data/valid_semconv_registry_manifest.yaml") .expect("Failed to load the registry configuration file."); - assert_eq!(config.name(), "vendor_acme"); + assert_eq!(config.name(), "acme.com/schemas"); assert_eq!(config.version(), "0.1.0"); } @@ -244,16 +341,12 @@ mod tests { let expected_errs = CompoundError(vec![ InvalidRegistryManifest { path: path.clone(), - error: "The registry name is required.".to_owned(), + error: "The registry schema base URL is required.".to_owned(), }, InvalidRegistryManifest { path: path.clone(), error: "The registry version is required.".to_owned(), }, - InvalidRegistryManifest { - path: path.clone(), - error: "The registry schema base URL is required.".to_owned(), - }, ]); if let Err(observed_errs) = result { diff --git a/crates/weaver_semconv/src/registry.rs b/crates/weaver_semconv/src/registry.rs index 7e33fa5b2..11251cd95 100644 --- a/crates/weaver_semconv/src/registry.rs +++ b/crates/weaver_semconv/src/registry.rs @@ -125,7 +125,7 @@ impl SemConvRegistry { LazyLock::new(|| Regex::new(r".*(v\d+\.\d+\.\d+).*").expect("Invalid regex")); // Load all the semantic convention registry. - let mut registry = SemConvRegistry::new(registry_repo.id().as_ref()); + let mut registry = SemConvRegistry::new(registry_repo.name().as_ref()); for spec in semconv_specs { registry.add_semconv_spec(spec); @@ -145,9 +145,16 @@ impl SemConvRegistry { registry.set_manifest(RegistryManifest { file_format: None, - schema_url: registry_repo.manifest().and_then(|m| Some(m.schema_url.clone())).unwrap_or_default(), - schema_base_url: registry_repo.manifest().and_then(|m| m.schema_base_url.clone()), - semconv_version: registry_repo.manifest().and_then(|m| m.semconv_version.clone()), + schema_url: registry_repo + .manifest() + .and_then(|m| Some(m.schema_url.clone())) + .unwrap_or_default(), + schema_base_url: registry_repo + .manifest() + .and_then(|m| m.schema_base_url.clone()), + semconv_version: registry_repo + .manifest() + .and_then(|m| m.semconv_version.clone()), description: registry_repo.manifest().and_then(|m| m.description.clone()), dependencies: vec![], resolved_schema_uri: None, @@ -386,7 +393,8 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "data".to_owned(), }; - let registry_repo = RegistryRepo::try_new("test", ®istry_path).unwrap(); + let registry_repo = + RegistryRepo::try_new(Some("test"), Some("1.0.0"), ®istry_path).unwrap(); let registry = SemConvRegistry::from_semconv_specs(®istry_repo, semconv_specs).unwrap(); assert_eq!(registry.id(), "test"); assert_eq!(registry.semconv_spec_count(), 2); diff --git a/crates/weaver_semconv/src/registry_repo.rs b/crates/weaver_semconv/src/registry_repo.rs index c91f89d57..8104b79bc 100644 --- a/crates/weaver_semconv/src/registry_repo.rs +++ b/crates/weaver_semconv/src/registry_repo.rs @@ -6,15 +6,13 @@ use std::default::Default; use std::path::{Path, PathBuf}; use std::sync::Arc; -use crate::manifest::RegistryManifest; +use crate::manifest::{RegistryManifest, SchemaUrl}; use crate::Error; use weaver_common::vdir::{VirtualDirectory, VirtualDirectoryPath}; use weaver_common::{get_path_type, log_info, log_warn}; /// The name of the legacy registry manifest file. -#[deprecated( - note = "The registry manifest file is renamed to `manifest.yaml`." -)] +#[deprecated(note = "The registry manifest file is renamed to `manifest.yaml`.")] pub const LEGACY_REGISTRY_MANIFEST: &str = "registry_manifest.yaml"; /// The name of the registry manifest file. @@ -29,8 +27,11 @@ pub const REGISTRY_MANIFEST: &str = "manifest.yaml"; /// that denotes where to find aspects of the registry. #[derive(Default, Debug, Clone)] pub struct RegistryRepo { - // A unique identifier for the registry (e.g. main, baseline, etc.) - id: Arc, + // A unique identifier for the registry (e.g. opentelemetry.io/schemas/sub-component). + name: Arc, + + // Registry version + version: Arc, // A virtual directory containing the registry. registry: VirtualDirectory, @@ -40,30 +41,73 @@ pub struct RegistryRepo { } impl RegistryRepo { - /// Creates a new `RegistryRepo` from a `RegistryPath` object that + /// Creates a new `RegistryRepo` from a name, version, and `RegistryPath` object that /// specifies the location of the registry. + /// If there is no manifest, name and version must be provided. pub fn try_new( - registry_id_if_no_manifest: &str, + name: Option<&str>, + version: Option<&str>, registry_path: &VirtualDirectoryPath, ) -> Result { - let mut registry_repo = Self { - id: Arc::from(registry_id_if_no_manifest), - registry: VirtualDirectory::try_new(registry_path) - .map_err(Error::VirtualDirectoryError)?, - manifest: None, - }; - if let Some(manifest) = registry_repo.manifest_path() { - let registry_manifest = RegistryManifest::try_from_file(manifest)?; - registry_repo.id = Arc::from(registry_manifest.name().as_str()); - registry_repo.manifest = Some(registry_manifest); + let registry = + VirtualDirectory::try_new(registry_path).map_err(Error::VirtualDirectoryError)?; + let mut manifest = None; + let mut registry_name = None; + let mut registry_version = None; + // Try to load manifest + if let Some(manifest_path) = { + // We need a temporary RegistryRepo to call manifest_path + let temp_repo = Self { + name: Arc::from(""), + version: Arc::from(""), + registry: registry.clone(), + manifest: None, + }; + temp_repo.manifest_path() + } { + let registry_manifest = RegistryManifest::try_from_file(manifest_path)?; + registry_name = Some(Arc::from(registry_manifest.name().as_str())); + registry_version = Some(Arc::from(registry_manifest.version().as_str())); + manifest = Some(registry_manifest); + } else { + // No manifest, require name and version + let name = name.ok_or_else(|| Error::InvalidRegistryManifest { + path: registry.path().to_path_buf(), + error: "Registry name must be provided if no manifest is present.".to_string(), + })?; + let version = version.ok_or_else(|| Error::InvalidRegistryManifest { + path: registry.path().to_path_buf(), + error: "Registry version must be provided if no manifest is present.".to_string(), + })?; + registry_name = Some(Arc::from(name)); + registry_version = Some(Arc::from(version)); + } + Ok(Self { + name: registry_name.unwrap(), + version: registry_version.unwrap(), + registry, + manifest, + }) + } + + /// Returns the registry name (from manifest if present, otherwise top-level field). + #[must_use] + pub fn name(&self) -> Arc { + if let Some(manifest) = &self.manifest { + Arc::from(manifest.name()) + } else { + self.name.clone() } - Ok(registry_repo) } - /// Returns the unique identifier for the registry. + /// Returns the registry version (from manifest if present, otherwise top-level field). #[must_use] - pub fn id(&self) -> Arc { - self.id.clone() + pub fn version(&self) -> Arc { + if let Some(manifest) = &self.manifest { + Arc::from(manifest.version()) + } else { + self.version.clone() + } } /// Returns the local path to the semconv registry. @@ -142,8 +186,14 @@ impl RegistryRepo { /// Returns the registry schema URL, if available in the manifest. #[must_use] - pub fn schema_url(&self) -> Option { - self.manifest.as_ref().and_then(|manifest| manifest.schema_url.clone()) + pub fn schema_url(&self) -> SchemaUrl { + // TODO: we should never have a registry without a schema URL at this point + // but not sure how to do it in terms of API design + // but for now we can just panic if we don't find a schema URL + self.manifest + .as_ref() + .and_then(|manifest| manifest.schema_url.clone()) + .expect("Schema URL must have been provided") } } @@ -167,7 +217,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "../../crates/weaver_codegen_test/semconv_registry".to_owned(), }; - let repo = RegistryRepo::try_new("main", ®istry_path).unwrap(); + let repo = RegistryRepo::try_new(Some("main"), Some("1.0.0"), ®istry_path).unwrap(); let repo_path = repo.path().to_path_buf(); assert!(repo_path.exists()); assert!( @@ -186,8 +236,8 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "tests/published_repository/resolved/1.0.0".to_owned(), }; - let repo = - RegistryRepo::try_new("main", ®istry_path).expect("Failed to load test repository."); + let repo = RegistryRepo::try_new(None, None, ®istry_path) + .expect("Failed to load test repository."); let Some(manifest) = repo.manifest() else { panic!("Did not resolve manifest for repo: {repo:?}"); }; @@ -208,8 +258,8 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "tests/published_repository/resolved/2.0.0".to_owned(), }; - let repo = - RegistryRepo::try_new("main", ®istry_path).expect("Failed to load test repository."); + let repo = RegistryRepo::try_new(None, None, ®istry_path) + .expect("Failed to load test repository."); let Some(resolved_path) = repo.resolved_schema_uri() else { panic!( "Should find a resolved schema path from manifest in {}", @@ -222,8 +272,8 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "tests/published_repository/3.0.0".to_owned(), }; - let repo = - RegistryRepo::try_new("main", ®istry_path).expect("Failed to load test repository."); + let repo = RegistryRepo::try_new(None, None, ®istry_path) + .expect("Failed to load test repository."); let Some(resolved_path) = repo.resolved_schema_uri() else { panic!( "Should find a resolved schema path from manifest in {}", diff --git a/crates/weaver_semconv/tests/published_repository/3.0.0/registry_manifest.yaml b/crates/weaver_semconv/tests/published_repository/3.0.0/registry_manifest.yaml index c4fc26958..0c752664c 100644 --- a/crates/weaver_semconv/tests/published_repository/3.0.0/registry_manifest.yaml +++ b/crates/weaver_semconv/tests/published_repository/3.0.0/registry_manifest.yaml @@ -1,7 +1,6 @@ file_format: manifest/2.0.0 -name: resolved description: Test repository that has been resolved. -version: 3.0.0 +schema_url: http://resolved/3.0.0 repository_url: https://github.com/open-telemetry/weaver.git stability: stable resolved_schema_uri: resolved_schema.yaml diff --git a/crates/weaver_semconv/tests/published_repository/resolved/1.0.0 b/crates/weaver_semconv/tests/published_repository/resolved/1.0.0 index aa8518c7b..b65691d98 100644 --- a/crates/weaver_semconv/tests/published_repository/resolved/1.0.0 +++ b/crates/weaver_semconv/tests/published_repository/resolved/1.0.0 @@ -1,7 +1,6 @@ file_format: manifest/2.0.0 -name: resolved description: Test repository that has been resolved. -version: 1.0.0 +schema_url: http://resolved/1.0.0 repository_url: https://github.com/open-telemetry/weaver.git stability: stable resolved_schema_uri: resolved_1.0.0.yaml diff --git a/crates/weaver_semconv/tests/published_repository/resolved/2.0.0 b/crates/weaver_semconv/tests/published_repository/resolved/2.0.0 index 91bbc3a6b..d8bf526e8 100644 --- a/crates/weaver_semconv/tests/published_repository/resolved/2.0.0 +++ b/crates/weaver_semconv/tests/published_repository/resolved/2.0.0 @@ -1,7 +1,6 @@ file_format: manifest/2.0.0 -name: resolved description: Test repository that has been resolved. -version: 2.0.0 +schema_url: http://resolved/2.0.0 repository_url: https://github.com/open-telemetry/weaver.git stability: stable resolved_schema_uri: https://github.com/open-telemetry/weaver.git\creates/weaver_semconv/tests/published_respository/resolved/resolved_2.0.0 diff --git a/crates/weaver_semconv_gen/data_v2/templates.md b/crates/weaver_semconv_gen/data_v2/templates.md index 28b87f7eb..1d9e54184 100644 --- a/crates/weaver_semconv_gen/data_v2/templates.md +++ b/crates/weaver_semconv_gen/data_v2/templates.md @@ -22,8 +22,8 @@ test.common Custom Snippet Name - -todo/1.0.0 + +https://todo/1.0.0 diff --git a/crates/weaver_semconv_gen/src/v1.rs b/crates/weaver_semconv_gen/src/v1.rs index 7579d61a8..27239e420 100644 --- a/crates/weaver_semconv_gen/src/v1.rs +++ b/crates/weaver_semconv_gen/src/v1.rs @@ -198,7 +198,7 @@ mod tests { path: "data".to_owned(), }; let mut diag_msgs = DiagnosticMessages::empty(); - let registry_repo = RegistryRepo::try_new("main", ®istry_path)?; + let registry_repo = RegistryRepo::try_new(Some("main"), Some("1.0.0"), ®istry_path)?; let generator = SnippetGenerator::try_from_registry_repo( ®istry_repo, template, diff --git a/crates/weaver_semconv_gen/src/v2.rs b/crates/weaver_semconv_gen/src/v2.rs index a83adf9a8..ae852bb2e 100644 --- a/crates/weaver_semconv_gen/src/v2.rs +++ b/crates/weaver_semconv_gen/src/v2.rs @@ -413,6 +413,7 @@ mod tests { use weaver_semconv::{ attribute::{AttributeType, PrimitiveOrArrayTypeSpec}, group::InstrumentSpec, + manifest::SchemaUrl, v2::{span::SpanName, CommonFields}, }; @@ -451,8 +452,7 @@ mod tests { fn test_registry() -> ResolvedTelemetrySchema { ResolvedTelemetrySchema { file_format: "resolved/2.0.0".to_owned(), - schema_url: "todo/1.0.0".to_owned(), - registry_id: "main".to_owned(), + schema_url: SchemaUrl("https://todo/1.0.0".to_owned()), attribute_catalog: vec![Attribute { key: "attr1".to_owned(), r#type: AttributeType::PrimitiveOrArray(PrimitiveOrArrayTypeSpec::String), @@ -466,7 +466,6 @@ mod tests { attributes: vec![AttributeRef(0)], common: CommonFields::default(), }], - registry_url: "todo".to_owned(), spans: vec![Span { r#type: "trace.test".to_owned().into(), kind: weaver_semconv::group::SpanKindSpec::Client, diff --git a/src/registry/check.rs b/src/registry/check.rs index 0f2a981da..427ee42ec 100644 --- a/src/registry/check.rs +++ b/src/registry/check.rs @@ -43,7 +43,7 @@ pub(crate) fn command(args: &RegistryCheckArgs) -> Result Result Result>) -> impl Into let registry = &state.registry; let stats = RegistryStats { - registry_url: registry.registry_url.clone(), + schema_url: registry.schema_url.to_string(), counts: RegistryCounts { attributes: registry.registry.attributes.len(), metrics: registry.registry.metrics.len(), diff --git a/src/serve/types.rs b/src/serve/types.rs index ed894c948..b750b9f10 100644 --- a/src/serve/types.rs +++ b/src/serve/types.rs @@ -10,8 +10,8 @@ use weaver_semconv::stability::Stability; /// Registry stats response. #[derive(Debug, Serialize, ToSchema)] pub struct RegistryStats { - /// The registry URL. - pub registry_url: String, + /// The schema URL. + pub schema_url: String, /// Counts of different entity types. pub counts: RegistryCounts, // TODO: It would be better to serve the output of `weaver registry stats` here diff --git a/src/weaver.rs b/src/weaver.rs index 19f018bbe..ed1423629 100644 --- a/src/weaver.rs +++ b/src/weaver.rs @@ -58,7 +58,8 @@ impl<'a> WeaverEngine<'a> { diag_msgs: &mut DiagnosticMessages, ) -> Result { let registry_path = &self.registry_config.registry; - let main_registry_repo = RegistryRepo::try_new("main", registry_path)?; + let main_registry_repo = + RegistryRepo::try_new(Some("unknown"), Some("unknown"), registry_path)?; self.load_definitions(main_registry_repo, diag_msgs) } diff --git a/tests/custom_registry/registry_manifest.yaml b/tests/custom_registry/registry_manifest.yaml index fd749cc86..ced966aa3 100644 --- a/tests/custom_registry/registry_manifest.yaml +++ b/tests/custom_registry/registry_manifest.yaml @@ -3,5 +3,5 @@ description: This registry contains the semantic conventions for the Acme vendor semconv_version: 0.1.0 schema_base_url: https://acme.com/schemas/ dependencies: - - name: otel + - schema_url: https://opentelemetry.io/schemas/1.30.0 registry_path: https://github.com/open-telemetry/semantic-conventions/archive/refs/tags/v1.30.0.zip[model] diff --git a/tests/registry_stats.rs b/tests/registry_stats.rs index a05ae06dd..32514720e 100644 --- a/tests/registry_stats.rs +++ b/tests/registry_stats.rs @@ -18,5 +18,9 @@ fn test_cli_interface() { .output() .expect("failed to execute process"); - assert!(output.status.success()); + assert!( + output.status.success(), + "Process did not exit successfully. Stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); } diff --git a/tests/resolution_process.rs b/tests/resolution_process.rs index 2e09f3afd..da1824745 100644 --- a/tests/resolution_process.rs +++ b/tests/resolution_process.rs @@ -33,9 +33,14 @@ fn test_cli_interface() { sub_folder: Some(SEMCONV_REGISTRY_MODEL.to_owned()), refspec: None, }; - let registry_repo = RegistryRepo::try_new("main", ®istry_path).unwrap_or_else(|e| { - panic!("Failed to create the registry repo, error: {e}"); - }); + + let registry_name = "opentelemetry.io/schemas"; + let registry_version = "1.40.0"; + let registry_repo = + RegistryRepo::try_new(Some(registry_name), Some(registry_version), ®istry_path) + .unwrap_or_else(|e| { + panic!("Failed to create the registry repo, error: {e}"); + }); let loaded = SchemaResolver::load_semconv_repository(registry_repo, false) .ignore(|e| matches!(e.severity(), Some(miette::Severity::Warning))) .into_result_failing_non_fatal() From ba91924e9c59a8df4000ddf98d8c64bcde07aed0 Mon Sep 17 00:00:00 2001 From: Liudmila Molkova Date: Sat, 14 Feb 2026 11:47:38 -0800 Subject: [PATCH 03/27] mostly ready --- crates/weaver_codegen_test/build.rs | 2 +- crates/weaver_forge/src/lib.rs | 16 +++--- crates/weaver_forge/src/v2/registry.rs | 6 +-- crates/weaver_resolved_schema/src/lib.rs | 6 +-- crates/weaver_resolved_schema/src/v2/mod.rs | 11 ++-- .../registry_b/registry_manifest.yaml | 5 +- crates/weaver_resolver/src/dependency.rs | 2 +- crates/weaver_resolver/src/lib.rs | 6 +-- crates/weaver_resolver/src/loader.rs | 21 ++------ crates/weaver_resolver/src/registry.rs | 12 ++--- crates/weaver_semconv/src/manifest.rs | 48 +++++++++++++++-- crates/weaver_semconv/src/registry.rs | 3 +- crates/weaver_semconv/src/registry_repo.rs | 52 +++++++++++-------- crates/weaver_semconv_gen/src/v1.rs | 2 +- crates/weaver_semconv_gen/src/v2.rs | 3 +- src/registry/check.rs | 2 +- src/registry/diff.rs | 5 +- src/registry/resolve.rs | 2 +- src/weaver.rs | 2 +- tests/resolution_process.rs | 7 ++- 20 files changed, 118 insertions(+), 95 deletions(-) diff --git a/crates/weaver_codegen_test/build.rs b/crates/weaver_codegen_test/build.rs index 8183376e2..5acd7ec84 100644 --- a/crates/weaver_codegen_test/build.rs +++ b/crates/weaver_codegen_test/build.rs @@ -42,7 +42,7 @@ fn main() { let registry_path = VirtualDirectoryPath::LocalFolder { path: SEMCONV_REGISTRY_PATH.into(), }; - let registry_repo = RegistryRepo::try_new(Some("main"), Some("1.0.0"), ®istry_path) + let registry_repo = RegistryRepo::try_new(None, ®istry_path) .unwrap_or_else(|e| process_error(&logger, e)); let loaded = SchemaResolver::load_semconv_repository(registry_repo, FOLLOW_SYMLINKS) .ignore(|e| matches!(e.severity(), Some(miette::Severity::Warning))) diff --git a/crates/weaver_forge/src/lib.rs b/crates/weaver_forge/src/lib.rs index f432ed2f2..44bd70069 100644 --- a/crates/weaver_forge/src/lib.rs +++ b/crates/weaver_forge/src/lib.rs @@ -841,6 +841,7 @@ mod tests { use weaver_common::vdir::VirtualDirectoryPath; use weaver_diff::diff_dir; use weaver_resolver::{LoadedSemconvRegistry, SchemaResolver}; + use weaver_semconv::manifest::SchemaUrl; use weaver_semconv::registry_repo::RegistryRepo; use crate::config::{ApplicationMode, CaseConvention, Params, TemplateConfig, WeaverConfig}; @@ -855,12 +856,11 @@ mod tests { cli_params: Params, ignore_non_fatal_errors: bool, ) -> (TemplateEngine, ResolvedRegistry, PathBuf, PathBuf) { - let registry_id = "default"; - let registry_version = "1.0.0"; + let schema_url = Some(SchemaUrl("https://default/1.0.0".to_owned())); let path: VirtualDirectoryPath = "data/registry" .try_into() .expect("Invalid virtual directory path string"); - let repo = RegistryRepo::try_new(Some(registry_id), Some(registry_version), &path) + let repo = RegistryRepo::try_new(schema_url, &path) .expect("Failed to construct repository"); let registry_result = SchemaResolver::load_semconv_repository(repo, false); // SemConvRegistry::try_from_path_pattern(registry_id, "data/*.yaml"); @@ -1057,12 +1057,11 @@ mod tests { }); engine.target_config.templates = Some(templates); - let registry_id = "default"; let path: VirtualDirectoryPath = "data/registry" .try_into() .expect("Invalid virtual directory path string"); - let registry_version = "1.0.0"; - let repo = RegistryRepo::try_new(Some(registry_id), Some(registry_version), &path) + let schema_url = Some(SchemaUrl("https://default/1.0.0".to_owned())); + let repo = RegistryRepo::try_new(schema_url, &path) .expect("Failed to construct repository"); let loaded = SchemaResolver::load_semconv_repository(repo, false) .into_result_with_non_fatal() @@ -1188,12 +1187,11 @@ mod tests { #[test] fn test_comment_format() { - let registry_id = "default"; let path: VirtualDirectoryPath = "data/mini_registry_for_comments" .try_into() .expect("Invalid virtual directory path string"); - let registry_version = "1.0.0"; - let repo = RegistryRepo::try_new(Some(registry_id), Some(registry_version), &path) + let schema_url = Some(SchemaUrl("https://default/1.0.0".to_owned())); + let repo = RegistryRepo::try_new(schema_url, &path) .expect("Failed to construct repository"); let loaded = SchemaResolver::load_semconv_repository(repo, false) .into_result_with_non_fatal() diff --git a/crates/weaver_forge/src/v2/registry.rs b/crates/weaver_forge/src/v2/registry.rs index cd219d683..b81d4e949 100644 --- a/crates/weaver_forge/src/v2/registry.rs +++ b/crates/weaver_forge/src/v2/registry.rs @@ -560,8 +560,7 @@ mod tests { common: CommonFields::default(), }, }], - }, - registry_manifest: None, + } }; let forge_registry = @@ -640,8 +639,7 @@ mod tests { spans: vec![], metrics: vec![], events: vec![], - }, - registry_manifest: None, + } }; let result = ForgeResolvedRegistry::try_from(resolved_schema); diff --git a/crates/weaver_resolved_schema/src/lib.rs b/crates/weaver_resolved_schema/src/lib.rs index 655ddbb21..5d1d3b6f8 100644 --- a/crates/weaver_resolved_schema/src/lib.rs +++ b/crates/weaver_resolved_schema/src/lib.rs @@ -104,7 +104,7 @@ impl ResolvedTelemetrySchema { instrumentation_library: None, dependencies: vec![], versions: None, - registry_manifest: None, + manifest: None, } } @@ -295,13 +295,13 @@ impl ResolvedTelemetrySchema { pub fn diff(&self, baseline_schema: &ResolvedTelemetrySchema) -> SchemaChanges { let mut changes = SchemaChanges::new(); - if let Some(ref manifest) = self.registry_manifest { + if let Some(ref manifest) = self.manifest { changes.set_head_manifest(weaver_version::schema_changes::RegistryManifest { semconv_version: manifest.version().clone(), }); } - if let Some(ref manifest) = baseline_schema.registry_manifest { + if let Some(ref manifest) = baseline_schema.manifest { changes.set_baseline_manifest(weaver_version::schema_changes::RegistryManifest { semconv_version: manifest.version().clone(), }); diff --git a/crates/weaver_resolved_schema/src/v2/mod.rs b/crates/weaver_resolved_schema/src/v2/mod.rs index 7acc570e4..23e2cdad4 100644 --- a/crates/weaver_resolved_schema/src/v2/mod.rs +++ b/crates/weaver_resolved_schema/src/v2/mod.rs @@ -57,9 +57,6 @@ pub struct ResolvedTelemetrySchema { pub registry: Registry, /// Refinements for the registry pub refinements: Refinements, - /// The manifest of the registry. - #[serde(skip)] - pub manifest: Option, } impl ResolvedTelemetrySchema { @@ -131,8 +128,7 @@ impl TryFrom for ResolvedTelemetrySchema { schema_url: value.schema_url, attribute_catalog, registry, - refinements, - registry_manifest: None, + refinements }) } } @@ -995,7 +991,7 @@ mod tests { resource: None, dependencies: vec![], versions: None, - registry_manifest: None, + manifest: None, }; let v2_schema: Result = v1_schema.try_into(); @@ -1227,8 +1223,7 @@ mod tests { spans: vec![], metrics: vec![], events: vec![], - }, - registry_manifest: None, + } } } } diff --git a/crates/weaver_resolver/data/circular-registry-test/registry_b/registry_manifest.yaml b/crates/weaver_resolver/data/circular-registry-test/registry_b/registry_manifest.yaml index f50389496..f4d66d2e4 100644 --- a/crates/weaver_resolver/data/circular-registry-test/registry_b/registry_manifest.yaml +++ b/crates/weaver_resolver/data/circular-registry-test/registry_b/registry_manifest.yaml @@ -3,7 +3,6 @@ description: Test registry B for circular dependency testing. semconv_version: 0.1.0 schema_base_url: https://example.com/registry_b/schemas/ dependencies: - # TODO: support legacy name-based dependencies as well (with warning) - #- name: registry_a - - schema_url: https://example.com/registry_a/schemas/1.0.0 + - name: registry_a + # schema_url: is not necessry here, we're using deprecated, but valid foe now `name` registry_path: data/circular-registry-test/registry_a \ No newline at end of file diff --git a/crates/weaver_resolver/src/dependency.rs b/crates/weaver_resolver/src/dependency.rs index 7345d14ba..fda6d54a2 100644 --- a/crates/weaver_resolver/src/dependency.rs +++ b/crates/weaver_resolver/src/dependency.rs @@ -523,7 +523,7 @@ mod tests { instrumentation_library: None, dependencies: vec![], versions: None, - registry_manifest: None, + manifest: None, } } } diff --git a/crates/weaver_resolver/src/lib.rs b/crates/weaver_resolver/src/lib.rs index 587e8ddd9..98952d291 100644 --- a/crates/weaver_resolver/src/lib.rs +++ b/crates/weaver_resolver/src/lib.rs @@ -125,7 +125,7 @@ impl SchemaResolver { instrumentation_library: None, dependencies: vec![], versions: None, // ToDo LQ: Implement this! - registry_manifest: manifest, + manifest: manifest, } }) } @@ -247,7 +247,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "data/multi-registry/custom_registry".to_owned(), }; - let registry_repo = RegistryRepo::try_new(Some("main"), Some("1.0.0"), ®istry_path)?; + let registry_repo = RegistryRepo::try_new(None, ®istry_path)?; // test with the `include_unreferenced` flag set to false check_semconv_load_and_resolve(registry_repo.clone(), false); // test with the `include_unreferenced` flag set to true @@ -261,7 +261,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "data/multi-registry/app_registry".to_owned(), }; - let registry_repo = RegistryRepo::try_new(None, None, ®istry_path)?; + let registry_repo = RegistryRepo::try_new(None, ®istry_path)?; let result = SchemaResolver::load_semconv_repository(registry_repo, true); match result { diff --git a/crates/weaver_resolver/src/loader.rs b/crates/weaver_resolver/src/loader.rs index 34190c5d0..6317a0450 100644 --- a/crates/weaver_resolver/src/loader.rs +++ b/crates/weaver_resolver/src/loader.rs @@ -48,7 +48,7 @@ impl LoadedSemconvRegistry { use weaver_common::vdir::VirtualDirectoryPath; use weaver_semconv::provenance::Provenance; let path: VirtualDirectoryPath = "data".try_into().expect("Bad fake path for test"); - let repo = RegistryRepo::try_new(Some("default"), Some("1.0.0"), &path).map_err(|e| { + let repo = RegistryRepo::try_new(None, &path).map_err(|e| { Error::InvalidUrl { url: "test string".to_owned(), error: format!("{e}"), @@ -204,18 +204,7 @@ fn load_semconv_repository_recursive( let mut loaded_dependencies = vec![]; let mut non_fatal_errors = vec![]; for d in manifest.dependencies.iter() { - let registry_path = d.registry_path.clone().unwrap_or_else(|| { - // If no registry path is provided, we assume it's the same as the parent registry. - VirtualDirectoryPath::RemoteArchive { - url: d.schema_url.to_string(), - sub_folder: None, - } - }); - match RegistryRepo::try_new( - Some(&d.schema_url.name()), - Some(&d.schema_url.version()), - ®istry_path, - ) { + match RegistryRepo::try_new_dependency(&d) { Ok(d_repo) => { // so we need to make sure the dependency chain only include direct dependencies of each other. match load_semconv_repository_recursive( @@ -411,7 +400,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "data/multi-registry/custom_registry".to_owned(), }; - let registry_repo = RegistryRepo::try_new(Some("main"), Some("1.0.0"), ®istry_path)?; + let registry_repo = RegistryRepo::try_new(None, ®istry_path)?; let mut diag_msgs = DiagnosticMessages::empty(); let loaded = load_semconv_repository(registry_repo, false) .capture_non_fatal_errors(&mut diag_msgs)?; @@ -453,7 +442,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "data/multi-registry/app_registry".to_owned(), }; - let registry_repo = RegistryRepo::try_new(Some("app"), Some("1.0.0"), ®istry_path)?; + let registry_repo = RegistryRepo::try_new(None, ®istry_path)?; // Try with depth limit of 1 - should fail at acme->otel transition let mut visited_registries = HashSet::new(); @@ -488,7 +477,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "data/circular-registry-test/registry_a".to_owned(), }; - let registry_repo = RegistryRepo::try_new(Some("registry_a"), Some(""), ®istry_path)?; + let registry_repo = RegistryRepo::try_new(None, ®istry_path)?; let result = load_semconv_repository(registry_repo, true); match result { diff --git a/crates/weaver_resolver/src/registry.rs b/crates/weaver_resolver/src/registry.rs index e1efbf897..c2278e745 100644 --- a/crates/weaver_resolver/src/registry.rs +++ b/crates/weaver_resolver/src/registry.rs @@ -846,6 +846,7 @@ pub(crate) fn cleanup_and_stabilize_catalog_and_registry( mod tests { use rand::rng; use rand::seq::SliceRandom; + use weaver_semconv::manifest::SchemaUrl; use std::cmp::Ordering; use std::collections::HashMap; use std::error::Error; @@ -918,13 +919,12 @@ mod tests { let observed_output_dir = PathBuf::from(format!("observed_output/{test_dir}")); std::fs::create_dir_all(observed_output_dir.clone()) .expect("Failed to create observed output directory"); - let registry_name = "default"; - let registry_version = "0.1.0"; + let schema_url = Some(SchemaUrl("https://default/0.1.0".to_owned())); let location: VirtualDirectoryPath = format!("{test_dir}/registry") .try_into() .expect("Failed to parse file directory"); let loaded = SchemaResolver::load_semconv_repository( - RegistryRepo::try_new(Some(registry_name), Some(registry_version), &location) + RegistryRepo::try_new(schema_url, &location) .expect("Failed to load registry"), true, ) @@ -1106,9 +1106,6 @@ groups: #[test] fn test_api_usage() -> Result<(), Box> { - let registry_id = "local"; - let registry_version = "1.0.0"; - // Load a semantic convention registry from a local directory. // Note: A method is also available to load a registry from a git // repository. @@ -1117,7 +1114,8 @@ groups: path: "data/registry-test-7-spans/registry".to_owned(), }; - let repo = RegistryRepo::try_new(Some(registry_id), Some(registry_version), &path)?; + let schema_url = Some(SchemaUrl(format!("https://local/registry/1.0.0"))); + let repo = RegistryRepo::try_new(schema_url, &path)?; let loaded = SchemaResolver::load_semconv_repository(repo, true).into_result_failing_non_fatal()?; let resolved_schema = diff --git a/crates/weaver_semconv/src/manifest.rs b/crates/weaver_semconv/src/manifest.rs index f3f82721f..f2a72eed9 100644 --- a/crates/weaver_semconv/src/manifest.rs +++ b/crates/weaver_semconv/src/manifest.rs @@ -128,14 +128,14 @@ pub struct RegistryManifest { pub description: Option, /// The version of the registry which will be used to define the semconv package version. - #[serde(skip_serializing_if = "Option::is_none", default)] + #[serde(default, skip_serializing)] #[deprecated( note = "The `version` field is deprecated. The registry version should be specified in the `schema_url` field, which is required and serves as a unique identifier for the registry." )] pub semconv_version: Option, /// The base URL where the registry's schema files are hosted. - #[serde(skip_serializing_if = "Option::is_none", default)] + #[serde(default, skip_serializing)] #[deprecated( note = "The `schema_base_url` field is deprecated. The registry schema URL should be specified in the `schema_url` field, which is required and serves as a unique identifier for the registry." )] @@ -157,7 +157,7 @@ pub struct RegistryManifest { } /// Represents a dependency of a semantic convention registry as defined in YAML. -#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] +#[derive(Serialize, Debug, Clone, JsonSchema)] pub struct Dependency { /// The schema URL for the dependency (required). /// It must follow OTel schema URL format, which is: `http[s]://server[:port]/path/`. @@ -175,6 +175,48 @@ pub struct Dependency { /// - A directory containing the raw definition. #[serde(skip_serializing_if = "Option::is_none")] pub registry_path: Option, + + /// This field is deprecated and should not be used. + /// The registry name should be derived from the `schema_url` field, + /// which serves as a unique identifier for the dependency registry + /// and includes registry version. + #[deprecated( + note = "The `name` field is deprecated. The registry name should be derived from the `schema_url` field, which serves as a unique identifier for the dependency registry." + )] + #[serde(default, skip_serializing)] // we can read, but won't write this field + pub name: Option, +} + +impl<'de> Deserialize<'de> for Dependency { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + #[derive(Deserialize)] + struct DependencyHelper { + name: Option, + schema_url: Option, + registry_path: Option, + } + + let helper = DependencyHelper::deserialize(deserializer)?; + + let schema_url = match (helper.schema_url, helper.name) { + (Some(url), _) => url, + (None, Some(name)) => SchemaUrl(format!("{}/unknown", name)), + (None, None) => { + return Err(serde::de::Error::custom( + "Either 'schema_url' or 'name' must be provided for a dependency" + )) + } + }; + + Ok(Dependency { + schema_url, + registry_path: helper.registry_path, + name: None, + }) + } } impl RegistryManifest { diff --git a/crates/weaver_semconv/src/registry.rs b/crates/weaver_semconv/src/registry.rs index 11251cd95..2afbfb507 100644 --- a/crates/weaver_semconv/src/registry.rs +++ b/crates/weaver_semconv/src/registry.rs @@ -286,6 +286,7 @@ impl SemConvRegistry { mod tests { use crate::attribute::{AttributeSpec, AttributeType, PrimitiveOrArrayTypeSpec}; use crate::group::{GroupSpec, GroupType}; + use crate::manifest::SchemaUrl; use crate::provenance::Provenance; use crate::registry::SemConvRegistry; use crate::registry_repo::RegistryRepo; @@ -394,7 +395,7 @@ mod tests { path: "data".to_owned(), }; let registry_repo = - RegistryRepo::try_new(Some("test"), Some("1.0.0"), ®istry_path).unwrap(); + RegistryRepo::try_new(Some(SchemaUrl("https://test/42".to_owned())), ®istry_path).unwrap(); let registry = SemConvRegistry::from_semconv_specs(®istry_repo, semconv_specs).unwrap(); assert_eq!(registry.id(), "test"); assert_eq!(registry.semconv_spec_count(), 2); diff --git a/crates/weaver_semconv/src/registry_repo.rs b/crates/weaver_semconv/src/registry_repo.rs index 8104b79bc..2a5de63fe 100644 --- a/crates/weaver_semconv/src/registry_repo.rs +++ b/crates/weaver_semconv/src/registry_repo.rs @@ -6,7 +6,7 @@ use std::default::Default; use std::path::{Path, PathBuf}; use std::sync::Arc; -use crate::manifest::{RegistryManifest, SchemaUrl}; +use crate::manifest::{Dependency, RegistryManifest, SchemaUrl}; use crate::Error; use weaver_common::vdir::{VirtualDirectory, VirtualDirectoryPath}; use weaver_common::{get_path_type, log_info, log_warn}; @@ -41,12 +41,26 @@ pub struct RegistryRepo { } impl RegistryRepo { - /// Creates a new `RegistryRepo` from a name, version, and `RegistryPath` object that + /// Creates a new `RegistryRepo` from a `Dependency` object that specifies the schema URL and path. + pub fn try_new_dependency( + dependency: &Dependency, + ) -> Result { + let path = dependency.registry_path.clone().unwrap_or_else(|| { + // If no registry path is provided, we assume it's the same as the parent registry. + VirtualDirectoryPath::RemoteArchive { + url: dependency.schema_url.to_string(), + sub_folder: None, + } + }); + Self::try_new(Some(dependency.schema_url.clone()), &path) + } + + /// Creates a new `RegistryRepo` from a schema URL and `RegistryPath` object that /// specifies the location of the registry. - /// If there is no manifest, name and version must be provided. + /// If there is no manifest and schema URL is not provided, registry + /// name and version are set to "unknown". pub fn try_new( - name: Option<&str>, - version: Option<&str>, + schema_url: Option, registry_path: &VirtualDirectoryPath, ) -> Result { let registry = @@ -66,25 +80,17 @@ impl RegistryRepo { temp_repo.manifest_path() } { let registry_manifest = RegistryManifest::try_from_file(manifest_path)?; - registry_name = Some(Arc::from(registry_manifest.name().as_str())); - registry_version = Some(Arc::from(registry_manifest.version().as_str())); + registry_name = Some(registry_manifest.name()); + registry_version = Some(registry_manifest.version()); manifest = Some(registry_manifest); } else { // No manifest, require name and version - let name = name.ok_or_else(|| Error::InvalidRegistryManifest { - path: registry.path().to_path_buf(), - error: "Registry name must be provided if no manifest is present.".to_string(), - })?; - let version = version.ok_or_else(|| Error::InvalidRegistryManifest { - path: registry.path().to_path_buf(), - error: "Registry version must be provided if no manifest is present.".to_string(), - })?; - registry_name = Some(Arc::from(name)); - registry_version = Some(Arc::from(version)); + registry_name = schema_url.as_ref().map(|url| url.name()).or(Some("unknown".to_owned())); + registry_version = schema_url.as_ref().map(|url| url.version()).or(Some("unknown".to_owned())); } Ok(Self { - name: registry_name.unwrap(), - version: registry_version.unwrap(), + name: registry_name.unwrap().into(), + version: registry_version.unwrap().into(), registry, manifest, }) @@ -217,7 +223,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "../../crates/weaver_codegen_test/semconv_registry".to_owned(), }; - let repo = RegistryRepo::try_new(Some("main"), Some("1.0.0"), ®istry_path).unwrap(); + let repo = RegistryRepo::try_new(None, ®istry_path).unwrap(); let repo_path = repo.path().to_path_buf(); assert!(repo_path.exists()); assert!( @@ -236,7 +242,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "tests/published_repository/resolved/1.0.0".to_owned(), }; - let repo = RegistryRepo::try_new(None, None, ®istry_path) + let repo = RegistryRepo::try_new(None, ®istry_path) .expect("Failed to load test repository."); let Some(manifest) = repo.manifest() else { panic!("Did not resolve manifest for repo: {repo:?}"); @@ -258,7 +264,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "tests/published_repository/resolved/2.0.0".to_owned(), }; - let repo = RegistryRepo::try_new(None, None, ®istry_path) + let repo = RegistryRepo::try_new(None, ®istry_path) .expect("Failed to load test repository."); let Some(resolved_path) = repo.resolved_schema_uri() else { panic!( @@ -272,7 +278,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "tests/published_repository/3.0.0".to_owned(), }; - let repo = RegistryRepo::try_new(None, None, ®istry_path) + let repo = RegistryRepo::try_new(None, ®istry_path) .expect("Failed to load test repository."); let Some(resolved_path) = repo.resolved_schema_uri() else { panic!( diff --git a/crates/weaver_semconv_gen/src/v1.rs b/crates/weaver_semconv_gen/src/v1.rs index 27239e420..4ce1e7943 100644 --- a/crates/weaver_semconv_gen/src/v1.rs +++ b/crates/weaver_semconv_gen/src/v1.rs @@ -198,7 +198,7 @@ mod tests { path: "data".to_owned(), }; let mut diag_msgs = DiagnosticMessages::empty(); - let registry_repo = RegistryRepo::try_new(Some("main"), Some("1.0.0"), ®istry_path)?; + let registry_repo = RegistryRepo::try_new(None, ®istry_path)?; let generator = SnippetGenerator::try_from_registry_repo( ®istry_repo, template, diff --git a/crates/weaver_semconv_gen/src/v2.rs b/crates/weaver_semconv_gen/src/v2.rs index ae852bb2e..47ea0369c 100644 --- a/crates/weaver_semconv_gen/src/v2.rs +++ b/crates/weaver_semconv_gen/src/v2.rs @@ -568,8 +568,7 @@ mod tests { common: CommonFields::default(), }, }], - }, - registry_manifest: None, + } } } } diff --git a/src/registry/check.rs b/src/registry/check.rs index 427ee42ec..6e525798f 100644 --- a/src/registry/check.rs +++ b/src/registry/check.rs @@ -43,7 +43,7 @@ pub(crate) fn command(args: &RegistryCheckArgs) -> Result Result Result WeaverEngine<'a> { ) -> Result { let registry_path = &self.registry_config.registry; let main_registry_repo = - RegistryRepo::try_new(Some("unknown"), Some("unknown"), registry_path)?; + RegistryRepo::try_new(None, registry_path)?; self.load_definitions(main_registry_repo, diag_msgs) } diff --git a/tests/resolution_process.rs b/tests/resolution_process.rs index da1824745..050472f81 100644 --- a/tests/resolution_process.rs +++ b/tests/resolution_process.rs @@ -6,7 +6,7 @@ use miette::Diagnostic; use weaver_common::vdir::VirtualDirectoryPath; use weaver_resolver::SchemaResolver; -use weaver_semconv::registry_repo::RegistryRepo; +use weaver_semconv::{manifest::SchemaUrl, registry_repo::RegistryRepo}; /// The URL of the official semantic convention registry. const SEMCONV_REGISTRY_URL: &str = "https://github.com/open-telemetry/semantic-conventions.git"; @@ -34,10 +34,9 @@ fn test_cli_interface() { refspec: None, }; - let registry_name = "opentelemetry.io/schemas"; - let registry_version = "1.40.0"; + let schema_url = Some(SchemaUrl("https://opelemetry.io/schemas/1.40.0".to_owned())); let registry_repo = - RegistryRepo::try_new(Some(registry_name), Some(registry_version), ®istry_path) + RegistryRepo::try_new(schema_url, ®istry_path) .unwrap_or_else(|e| { panic!("Failed to create the registry repo, error: {e}"); }); From a42f7c82047f09cd0f74c4e585871dd97f0e5406 Mon Sep 17 00:00:00 2001 From: Liudmila Molkova Date: Sat, 14 Feb 2026 13:16:33 -0800 Subject: [PATCH 04/27] cleanup --- crates/weaver_codegen_test/build.rs | 4 +- crates/weaver_emit/src/lib.rs | 2 +- crates/weaver_forge/src/lib.rs | 18 +-- crates/weaver_forge/src/v2/registry.rs | 8 +- crates/weaver_live_check/src/advice/mod.rs | 12 +- crates/weaver_live_check/src/lib.rs | 6 +- crates/weaver_live_check/src/live_checker.rs | 28 ++-- crates/weaver_mcp/src/service.rs | 4 +- crates/weaver_resolved_schema/src/lib.rs | 2 +- crates/weaver_resolved_schema/src/v2/mod.rs | 12 +- crates/weaver_resolver/src/dependency.rs | 20 +-- crates/weaver_resolver/src/lib.rs | 8 +- crates/weaver_resolver/src/loader.rs | 20 ++- crates/weaver_resolver/src/registry.rs | 9 +- crates/weaver_search/src/lib.rs | 2 +- crates/weaver_semconv/src/manifest.rs | 138 ++++++++++++------- crates/weaver_semconv/src/registry.rs | 28 ++-- crates/weaver_semconv/src/registry_repo.rs | 64 +++++---- crates/weaver_semconv_gen/src/v2.rs | 4 +- src/registry/diff.rs | 8 +- src/registry/live_check.rs | 2 +- src/registry/resolve.rs | 3 +- src/weaver.rs | 3 +- tests/resolution_process.rs | 10 +- 24 files changed, 228 insertions(+), 187 deletions(-) diff --git a/crates/weaver_codegen_test/build.rs b/crates/weaver_codegen_test/build.rs index 5acd7ec84..584219f08 100644 --- a/crates/weaver_codegen_test/build.rs +++ b/crates/weaver_codegen_test/build.rs @@ -42,8 +42,8 @@ fn main() { let registry_path = VirtualDirectoryPath::LocalFolder { path: SEMCONV_REGISTRY_PATH.into(), }; - let registry_repo = RegistryRepo::try_new(None, ®istry_path) - .unwrap_or_else(|e| process_error(&logger, e)); + let registry_repo = + RegistryRepo::try_new(None, ®istry_path).unwrap_or_else(|e| process_error(&logger, e)); let loaded = SchemaResolver::load_semconv_repository(registry_repo, FOLLOW_SYMLINKS) .ignore(|e| matches!(e.severity(), Some(miette::Severity::Warning))) .into_result_failing_non_fatal() diff --git a/crates/weaver_emit/src/lib.rs b/crates/weaver_emit/src/lib.rs index 1c8477d0f..c00c0b070 100644 --- a/crates/weaver_emit/src/lib.rs +++ b/crates/weaver_emit/src/lib.rs @@ -591,7 +591,7 @@ mod tests { }; let registry = ForgeResolvedRegistry { - schema_url: SchemaUrl("https://example.com/schemas/1.2.3".to_owned()), + schema_url: SchemaUrl::new("https://example.com/schemas/1.2.3".to_owned()), registry: Registry { attributes: vec![], attribute_groups: vec![], diff --git a/crates/weaver_forge/src/lib.rs b/crates/weaver_forge/src/lib.rs index 44bd70069..ba2f59a0d 100644 --- a/crates/weaver_forge/src/lib.rs +++ b/crates/weaver_forge/src/lib.rs @@ -856,12 +856,12 @@ mod tests { cli_params: Params, ignore_non_fatal_errors: bool, ) -> (TemplateEngine, ResolvedRegistry, PathBuf, PathBuf) { - let schema_url = Some(SchemaUrl("https://default/1.0.0".to_owned())); + let schema_url = Some(SchemaUrl::new("https://default/1.0.0".to_owned())); let path: VirtualDirectoryPath = "data/registry" .try_into() .expect("Invalid virtual directory path string"); - let repo = RegistryRepo::try_new(schema_url, &path) - .expect("Failed to construct repository"); + let repo = + RegistryRepo::try_new(schema_url, &path).expect("Failed to construct repository"); let registry_result = SchemaResolver::load_semconv_repository(repo, false); // SemConvRegistry::try_from_path_pattern(registry_id, "data/*.yaml"); let registry = if ignore_non_fatal_errors { @@ -1060,9 +1060,9 @@ mod tests { let path: VirtualDirectoryPath = "data/registry" .try_into() .expect("Invalid virtual directory path string"); - let schema_url = Some(SchemaUrl("https://default/1.0.0".to_owned())); - let repo = RegistryRepo::try_new(schema_url, &path) - .expect("Failed to construct repository"); + let schema_url = Some(SchemaUrl::new("https://default/1.0.0".to_owned())); + let repo = + RegistryRepo::try_new(schema_url, &path).expect("Failed to construct repository"); let loaded = SchemaResolver::load_semconv_repository(repo, false) .into_result_with_non_fatal() .expect("Failed to load registry") @@ -1190,9 +1190,9 @@ mod tests { let path: VirtualDirectoryPath = "data/mini_registry_for_comments" .try_into() .expect("Invalid virtual directory path string"); - let schema_url = Some(SchemaUrl("https://default/1.0.0".to_owned())); - let repo = RegistryRepo::try_new(schema_url, &path) - .expect("Failed to construct repository"); + let schema_url = Some(SchemaUrl::new("https://default/1.0.0".to_owned())); + let repo = + RegistryRepo::try_new(schema_url, &path).expect("Failed to construct repository"); let loaded = SchemaResolver::load_semconv_repository(repo, false) .into_result_with_non_fatal() .expect("Failed to load registry") diff --git a/crates/weaver_forge/src/v2/registry.rs b/crates/weaver_forge/src/v2/registry.rs index b81d4e949..2db318af5 100644 --- a/crates/weaver_forge/src/v2/registry.rs +++ b/crates/weaver_forge/src/v2/registry.rs @@ -448,7 +448,7 @@ mod tests { fn test_try_from_resolved_schema() { let resolved_schema = ResolvedTelemetrySchema { file_format: "2.0.0".to_owned(), - schema_url: SchemaUrl("https://example.com/schema".to_owned()), + schema_url: SchemaUrl::new("https://example.com/schema".to_owned()), attribute_catalog: vec![attribute::Attribute { key: "test.attr".to_owned(), r#type: AttributeType::PrimitiveOrArray(PrimitiveOrArrayTypeSpec::String), @@ -560,7 +560,7 @@ mod tests { common: CommonFields::default(), }, }], - } + }, }; let forge_registry = @@ -610,7 +610,7 @@ mod tests { fn test_try_from_resolved_schema_with_missing_attribute() { let resolved_schema = ResolvedTelemetrySchema { file_format: "2.0.0".to_owned(), - schema_url: SchemaUrl("https://example.com/schema".to_owned()), + schema_url: SchemaUrl::new("https://example.com/schema".to_owned()), attribute_catalog: vec![], registry: v2::registry::Registry { attributes: vec![], // No attributes - This is the logic bug. @@ -639,7 +639,7 @@ mod tests { spans: vec![], metrics: vec![], events: vec![], - } + }, }; let result = ForgeResolvedRegistry::try_from(resolved_schema); diff --git a/crates/weaver_live_check/src/advice/mod.rs b/crates/weaver_live_check/src/advice/mod.rs index 3daf3128a..3fdca2cf5 100644 --- a/crates/weaver_live_check/src/advice/mod.rs +++ b/crates/weaver_live_check/src/advice/mod.rs @@ -161,7 +161,7 @@ mod tests { // Test DeprecatedAdvisor let mut deprecated_advisor = DeprecatedAdvisor; - let deprecated_attr = Rc::new(VersionedAttribute::V1(Attribute { + let deprecated_attr = Rc::new(VersionedAttribute::V1(Box::new(Attribute { name: "deprecated.attr".to_owned(), requirement_level: RequirementLevel::Basic(BasicRequirementLevelSpec::Required), r#type: PrimitiveOrArray(PrimitiveOrArrayTypeSpec::String), @@ -179,7 +179,7 @@ mod tests { role: None, tags: None, value: None, - })); + }))); let sample_attr = create_sample_attribute("deprecated.attr"); let sample = Sample::Attribute(sample_attr.clone()); @@ -198,7 +198,7 @@ mod tests { // Test TypeAdvisor let mut type_advisor = TypeAdvisor; - let int_attr = Rc::new(VersionedAttribute::V1(Attribute { + let int_attr = Rc::new(VersionedAttribute::V1(Box::new(Attribute { name: "int.attr".to_owned(), requirement_level: RequirementLevel::Basic(BasicRequirementLevelSpec::Required), r#type: PrimitiveOrArray(PrimitiveOrArrayTypeSpec::Int), @@ -214,7 +214,7 @@ mod tests { role: None, tags: None, value: None, - })); + }))); let mut sample_attr = create_sample_attribute("int.attr"); sample_attr.r#type = Some(PrimitiveOrArrayTypeSpec::String); @@ -236,7 +236,7 @@ mod tests { // Test StabilityAdvisor let mut stability_advisor = StabilityAdvisor; - let dev_attr = Rc::new(VersionedAttribute::V1(Attribute { + let dev_attr = Rc::new(VersionedAttribute::V1(Box::new(Attribute { name: "dev.attr".to_owned(), requirement_level: RequirementLevel::Basic(BasicRequirementLevelSpec::Required), r#type: PrimitiveOrArray(PrimitiveOrArrayTypeSpec::String), @@ -252,7 +252,7 @@ mod tests { role: None, tags: None, value: None, - })); + }))); let sample_attr = create_sample_attribute("dev.attr"); let sample = Sample::Attribute(sample_attr.clone()); diff --git a/crates/weaver_live_check/src/lib.rs b/crates/weaver_live_check/src/lib.rs index 6ea0018fb..23c8d577a 100644 --- a/crates/weaver_live_check/src/lib.rs +++ b/crates/weaver_live_check/src/lib.rs @@ -118,7 +118,7 @@ pub enum VersionedRegistry { /// v1 ResolvedRegistry V1(ResolvedRegistry), /// v2 ForgeResolvedRegistry - V2(ForgeResolvedRegistry), + V2(Box), } /// Versioned enum for the attribute @@ -126,9 +126,9 @@ pub enum VersionedRegistry { #[serde(untagged)] pub enum VersionedAttribute { /// v1 Attribute - V1(weaver_resolved_schema::attribute::Attribute), + V1(Box), /// v2 Attribute - V2(weaver_forge::v2::attribute::Attribute), + V2(Box), } impl VersionedAttribute { diff --git a/crates/weaver_live_check/src/live_checker.rs b/crates/weaver_live_check/src/live_checker.rs index f73b47e00..c7f960da5 100644 --- a/crates/weaver_live_check/src/live_checker.rs +++ b/crates/weaver_live_check/src/live_checker.rs @@ -64,7 +64,7 @@ impl LiveChecker { } } for attribute in &group.attributes { - let attribute_rc = Rc::new(VersionedAttribute::V1(attribute.clone())); + let attribute_rc = Rc::new(VersionedAttribute::V1(Box::new(attribute.clone()))); match attribute.r#type { AttributeType::Template(_) => { templates_by_length @@ -92,7 +92,7 @@ impl LiveChecker { let _ = semconv_events.insert(event_name, event_rc); } for attribute in ®istry.registry.attributes { - let attribute_rc = Rc::new(VersionedAttribute::V2(attribute.clone())); + let attribute_rc = Rc::new(VersionedAttribute::V2(Box::new(attribute.clone()))); match &attribute.r#type { AttributeType::Template(_) => { templates_by_length.push((attribute.key.clone(), attribute_rc.clone())); @@ -510,8 +510,8 @@ mod tests { fn make_registry(use_v2: bool) -> VersionedRegistry { if use_v2 { - VersionedRegistry::V2(ForgeResolvedRegistry { - schema_url: SchemaUrl("https://example.com/schemas/1.2.3".to_owned()), + VersionedRegistry::V2(Box::new(ForgeResolvedRegistry { + schema_url: SchemaUrl::new("https://example.com/schemas/1.2.3".to_owned()), registry: Registry { attributes: vec![ V2Attribute { @@ -612,7 +612,7 @@ mod tests { spans: vec![], events: vec![], }, - }) + })) } else { VersionedRegistry::V1(ResolvedRegistry { registry_url: "TEST".to_owned(), @@ -796,8 +796,8 @@ mod tests { }, }; - VersionedRegistry::V2(ForgeResolvedRegistry { - schema_url: SchemaUrl("https://example.com/schemas/1.2.3".to_owned()), + VersionedRegistry::V2(Box::new(ForgeResolvedRegistry { + schema_url: SchemaUrl::new("https://example.com/schemas/1.2.3".to_owned()), registry: Registry { attributes: vec![memory_state_attr.clone()], attribute_groups: vec![], @@ -845,7 +845,7 @@ mod tests { spans: vec![], events: vec![], }, - }) + })) } else { VersionedRegistry::V1(ResolvedRegistry { registry_url: "TEST_METRICS".to_owned(), @@ -1004,8 +1004,8 @@ mod tests { }, }; - VersionedRegistry::V2(ForgeResolvedRegistry { - schema_url: SchemaUrl("https://example.com/schemas/1.2.3".to_owned()), + VersionedRegistry::V2(Box::new(ForgeResolvedRegistry { + schema_url: SchemaUrl::new("https://example.com/schemas/1.2.3".to_owned()), registry: Registry { attributes: vec![custom_string_attr.clone()], attribute_groups: vec![], @@ -1040,7 +1040,7 @@ mod tests { spans: vec![], events: vec![], }, - }) + })) } else { VersionedRegistry::V1(ResolvedRegistry { registry_url: "TEST".to_owned(), @@ -1518,8 +1518,8 @@ mod tests { }, }; - VersionedRegistry::V2(ForgeResolvedRegistry { - schema_url: SchemaUrl("https://example.com/schemas/1.2.3".to_owned()), + VersionedRegistry::V2(Box::new(ForgeResolvedRegistry { + schema_url: SchemaUrl::new("https://example.com/schemas/1.2.3".to_owned()), registry: Registry { attributes: vec![session_id_attr.clone(), session_previous_id_attr.clone()], attribute_groups: vec![], @@ -1593,7 +1593,7 @@ mod tests { spans: vec![], events: vec![], }, - }) + })) } else { VersionedRegistry::V1(ResolvedRegistry { registry_url: "TEST_EVENTS".to_owned(), diff --git a/crates/weaver_mcp/src/service.rs b/crates/weaver_mcp/src/service.rs index ea4fc2617..9850dbd6d 100644 --- a/crates/weaver_mcp/src/service.rs +++ b/crates/weaver_mcp/src/service.rs @@ -58,7 +58,7 @@ impl WeaverMcpService { let search_context = Arc::new(SearchContext::from_registry(®istry)); // Create versioned registry wrapper once for live check - let versioned_registry = Arc::new(VersionedRegistry::V2((*registry).clone())); + let versioned_registry = Arc::new(VersionedRegistry::V2(Box::new(registry.as_ref().clone()))); Self { search_context, @@ -393,7 +393,7 @@ mod tests { fn make_test_registry() -> ForgeResolvedRegistry { ForgeResolvedRegistry { - schema_url: SchemaUrl("https://todo/1.0.0".to_owned()), + schema_url: SchemaUrl::new("https://todo/1.0.0".to_owned()), registry: Registry { attributes: vec![Attribute { key: "http.request.method".to_owned(), diff --git a/crates/weaver_resolved_schema/src/lib.rs b/crates/weaver_resolved_schema/src/lib.rs index 5d1d3b6f8..f16385777 100644 --- a/crates/weaver_resolved_schema/src/lib.rs +++ b/crates/weaver_resolved_schema/src/lib.rs @@ -97,7 +97,7 @@ impl ResolvedTelemetrySchema { Self { file_format: V1_RESOLVED_FILE_FORMAT.to_owned(), // TODO: is it correct? - schema_url: SchemaUrl(schema_url.as_ref().to_owned()), + schema_url: SchemaUrl::new(schema_url.as_ref().to_owned()), registry: Registry::new(registry_url), catalog: Catalog::default(), resource: None, diff --git a/crates/weaver_resolved_schema/src/v2/mod.rs b/crates/weaver_resolved_schema/src/v2/mod.rs index 23e2cdad4..e4a83d845 100644 --- a/crates/weaver_resolved_schema/src/v2/mod.rs +++ b/crates/weaver_resolved_schema/src/v2/mod.rs @@ -7,7 +7,7 @@ use serde::{Deserialize, Serialize}; use weaver_semconv::{ deprecated::Deprecated, group::GroupType, - manifest::{RegistryManifest, SchemaUrl}, + manifest::SchemaUrl, v2::{ attribute_group::AttributeGroupVisibilitySpec, signal_id::SignalId, span::SpanName, CommonFields, @@ -128,7 +128,7 @@ impl TryFrom for ResolvedTelemetrySchema { schema_url: value.schema_url, attribute_catalog, registry, - refinements + refinements, }) } } @@ -981,7 +981,7 @@ mod tests { fn test_try_from_v1_to_v2() { let v1_schema = crate::ResolvedTelemetrySchema { file_format: V1_RESOLVED_FILE_FORMAT.to_owned(), - schema_url: SchemaUrl("http://test/schemas/1.0.0".to_owned()), + schema_url: SchemaUrl::new("http://test/schemas/1.0.0".to_owned()), catalog: crate::catalog::Catalog::from_attributes(vec![]), registry: crate::registry::Registry { registry_url: "http://test/schemas/1.0".to_owned(), @@ -1000,7 +1000,7 @@ mod tests { assert_eq!(v2_schema.file_format, V2_RESOLVED_FILE_FORMAT); assert_eq!( v2_schema.schema_url, - SchemaUrl("http://test/schemas/1.0.0".to_owned()) + SchemaUrl::new("http://test/schemas/1.0.0".to_owned()) ); } @@ -1209,7 +1209,7 @@ mod tests { fn empty_v2_schema() -> ResolvedTelemetrySchema { ResolvedTelemetrySchema { file_format: V2_RESOLVED_FILE_FORMAT.to_owned(), - schema_url: SchemaUrl("http://test/schemas/1.0".to_owned()), + schema_url: SchemaUrl::new("http://test/schemas/1.0".to_owned()), attribute_catalog: vec![], registry: Registry { attributes: vec![], @@ -1223,7 +1223,7 @@ mod tests { spans: vec![], metrics: vec![], events: vec![], - } + }, } } } diff --git a/crates/weaver_resolver/src/dependency.rs b/crates/weaver_resolver/src/dependency.rs index fda6d54a2..f6b5bb8da 100644 --- a/crates/weaver_resolver/src/dependency.rs +++ b/crates/weaver_resolver/src/dependency.rs @@ -19,9 +19,9 @@ use crate::{attribute::AttributeCatalog, Error}; #[derive(Debug, Deserialize)] pub(crate) enum ResolvedDependency { /// A V1 Dependency - V1(V1Schema), + V1(Box), // A V2 Dependency - V2(V2Schema), + V2(Box), } impl ResolvedDependency { @@ -167,7 +167,7 @@ impl ImportableDependency for V2Schema { for ar in m.attributes.iter() { let attr = self.attribute_catalog.attribute(&ar.base).ok_or( Error::InvalidRegistryAttributeRef { - registry_name: self.schema_url.name().clone(), + registry_name: self.schema_url.name().to_owned(), attribute_ref: ar.base.0, }, )?; @@ -214,7 +214,7 @@ impl ImportableDependency for V2Schema { for ar in e.attributes.iter() { let attr = self.attribute_catalog.attribute(&ar.base).ok_or( Error::InvalidRegistryAttributeRef { - registry_name: self.schema_url.name().clone(), + registry_name: self.schema_url.name().to_owned(), attribute_ref: ar.base.0, }, )?; @@ -262,7 +262,7 @@ impl ImportableDependency for V2Schema { // TODO - this should be non-panic errors. let attr = self.attribute_catalog.attribute(&ar.base).ok_or( Error::InvalidRegistryAttributeRef { - registry_name: self.schema_url.name().clone(), + registry_name: self.schema_url.name().to_owned(), attribute_ref: ar.base.0, }, )?; @@ -276,7 +276,7 @@ impl ImportableDependency for V2Schema { // TODO - this should be non-panic errors. let attr = self.attribute_catalog.attribute(&ar.base).ok_or( Error::InvalidRegistryAttributeRef { - registry_name: self.schema_url.name().clone(), + registry_name: self.schema_url.name().to_owned(), attribute_ref: ar.base.0, }, )?; @@ -411,13 +411,13 @@ impl UnresolvedAttributeLookup for Vec { impl From for ResolvedDependency { fn from(value: V1Schema) -> Self { - ResolvedDependency::V1(value) + ResolvedDependency::V1(Box::new(value)) } } impl From for ResolvedDependency { fn from(value: V2Schema) -> Self { - ResolvedDependency::V2(value) + ResolvedDependency::V2(Box::new(value)) } } @@ -445,7 +445,7 @@ mod tests { #[test] fn test_lookup_group_attributes() -> Result<(), Box> { - let d = ResolvedDependency::V1(example_v1_schema()); + let d = ResolvedDependency::V1(Box::new(example_v1_schema())); let result = d.lookup_group_attributes("a"); assert!( result.is_some(), @@ -471,7 +471,7 @@ mod tests { fn example_v1_schema() -> V1Schema { V1Schema { file_format: "resolved/1.0.0".to_owned(), - schema_url: SchemaUrl("http://test/schemas/1.0.0".to_owned()), + schema_url: SchemaUrl::new("http://test/schemas/1.0.0".to_owned()), registry: weaver_resolved_schema::registry::Registry { registry_url: "v1-example".to_owned(), groups: vec![weaver_resolved_schema::registry::Group { diff --git a/crates/weaver_resolver/src/lib.rs b/crates/weaver_resolver/src/lib.rs index 98952d291..97c3b665f 100644 --- a/crates/weaver_resolver/src/lib.rs +++ b/crates/weaver_resolver/src/lib.rs @@ -43,7 +43,7 @@ impl SchemaResolver { dependencies, } => Self::resolve_registry(repo, specs, imports, dependencies, include_unreferenced), LoadedSemconvRegistry::Resolved(resolved_telemetry_schema) => { - WResult::Ok(resolved_telemetry_schema) + WResult::Ok(*resolved_telemetry_schema) } LoadedSemconvRegistry::ResolvedV2(_) => { todo!("Converting V2 schema back into V1 is unsupported") @@ -69,7 +69,7 @@ impl SchemaResolver { .push(Self::resolve(d, include_unreferenced).map(|s| s.into())); } LoadedSemconvRegistry::Resolved(schema) => { - opt_resolved_dependencies.push(WResult::Ok(schema.into())); + opt_resolved_dependencies.push(WResult::Ok((*schema).into())); } LoadedSemconvRegistry::ResolvedV2(schema) => { opt_resolved_dependencies.push(WResult::Ok(schema.into())); @@ -118,14 +118,14 @@ impl SchemaResolver { ResolvedTelemetrySchema { file_format: "1.0.0".to_owned(), - schema_url: schema_url, + schema_url, registry: resolved_registry, catalog, resource: None, instrumentation_library: None, dependencies: vec![], versions: None, // ToDo LQ: Implement this! - manifest: manifest, + manifest, } }) } diff --git a/crates/weaver_resolver/src/loader.rs b/crates/weaver_resolver/src/loader.rs index 6317a0450..a73843ca3 100644 --- a/crates/weaver_resolver/src/loader.rs +++ b/crates/weaver_resolver/src/loader.rs @@ -36,7 +36,7 @@ pub enum LoadedSemconvRegistry { dependencies: Vec, }, /// The semconv repository is already resolved and can be used as-is. - Resolved(V1Schema), + Resolved(Box), /// The semconv repository is already resolved and can be used as-is. ResolvedV2(V2Schema), } @@ -48,11 +48,9 @@ impl LoadedSemconvRegistry { use weaver_common::vdir::VirtualDirectoryPath; use weaver_semconv::provenance::Provenance; let path: VirtualDirectoryPath = "data".try_into().expect("Bad fake path for test"); - let repo = RegistryRepo::try_new(None, &path).map_err(|e| { - Error::InvalidUrl { - url: "test string".to_owned(), - error: format!("{e}"), - } + let repo = RegistryRepo::try_new(None, &path).map_err(|e| Error::InvalidUrl { + url: "test string".to_owned(), + error: format!("{e}"), })?; let provenance = Provenance::new("default", ""); let spec_with_provenance = SemConvSpecWithProvenance::from_string(provenance, spec) @@ -81,8 +79,8 @@ impl LoadedSemconvRegistry { match self { LoadedSemconvRegistry::Unresolved { repo, .. } => repo.registry_path_repr(), // TODO - are these correct? - LoadedSemconvRegistry::Resolved(schema) => &schema.schema_url.0, - LoadedSemconvRegistry::ResolvedV2(schema) => &schema.schema_url.0, + LoadedSemconvRegistry::Resolved(schema) => &schema.schema_url.url, + LoadedSemconvRegistry::ResolvedV2(schema) => &schema.schema_url.url, } } @@ -117,8 +115,8 @@ impl LoadedSemconvRegistry { } result } - LoadedSemconvRegistry::Resolved(schema) => vec![schema.schema_url.name().clone()], - LoadedSemconvRegistry::ResolvedV2(schema) => vec![schema.schema_url.name().clone()], + LoadedSemconvRegistry::Resolved(schema) => vec![schema.schema_url.name().to_owned()], + LoadedSemconvRegistry::ResolvedV2(schema) => vec![schema.schema_url.name().to_owned()], } } } @@ -204,7 +202,7 @@ fn load_semconv_repository_recursive( let mut loaded_dependencies = vec![]; let mut non_fatal_errors = vec![]; for d in manifest.dependencies.iter() { - match RegistryRepo::try_new_dependency(&d) { + match RegistryRepo::try_new_dependency(d) { Ok(d_repo) => { // so we need to make sure the dependency chain only include direct dependencies of each other. match load_semconv_repository_recursive( diff --git a/crates/weaver_resolver/src/registry.rs b/crates/weaver_resolver/src/registry.rs index c2278e745..3916363bf 100644 --- a/crates/weaver_resolver/src/registry.rs +++ b/crates/weaver_resolver/src/registry.rs @@ -846,12 +846,12 @@ pub(crate) fn cleanup_and_stabilize_catalog_and_registry( mod tests { use rand::rng; use rand::seq::SliceRandom; - use weaver_semconv::manifest::SchemaUrl; use std::cmp::Ordering; use std::collections::HashMap; use std::error::Error; use std::fs::OpenOptions; use std::path::PathBuf; + use weaver_semconv::manifest::SchemaUrl; use glob::glob; use serde::Serialize; @@ -919,13 +919,12 @@ mod tests { let observed_output_dir = PathBuf::from(format!("observed_output/{test_dir}")); std::fs::create_dir_all(observed_output_dir.clone()) .expect("Failed to create observed output directory"); - let schema_url = Some(SchemaUrl("https://default/0.1.0".to_owned())); + let schema_url = Some(SchemaUrl::new("https://default/0.1.0".to_owned())); let location: VirtualDirectoryPath = format!("{test_dir}/registry") .try_into() .expect("Failed to parse file directory"); let loaded = SchemaResolver::load_semconv_repository( - RegistryRepo::try_new(schema_url, &location) - .expect("Failed to load registry"), + RegistryRepo::try_new(schema_url, &location).expect("Failed to load registry"), true, ) .ignore(|e| { @@ -1114,7 +1113,7 @@ groups: path: "data/registry-test-7-spans/registry".to_owned(), }; - let schema_url = Some(SchemaUrl(format!("https://local/registry/1.0.0"))); + let schema_url = Some(SchemaUrl::new("https://local/registry/1.0.0".to_owned())); let repo = RegistryRepo::try_new(schema_url, &path)?; let loaded = SchemaResolver::load_semconv_repository(repo, true).into_result_failing_non_fatal()?; diff --git a/crates/weaver_search/src/lib.rs b/crates/weaver_search/src/lib.rs index 70a2a1ace..8525af99f 100644 --- a/crates/weaver_search/src/lib.rs +++ b/crates/weaver_search/src/lib.rs @@ -544,7 +544,7 @@ mod tests { fn make_test_registry() -> ForgeResolvedRegistry { ForgeResolvedRegistry { - schema_url: SchemaUrl("https://example.com/schemas/1.2.3".to_owned()), + schema_url: SchemaUrl::new("https://example.com/schemas/1.2.3".to_owned()), registry: Registry { attributes: vec![ make_attribute("http.request.method", "HTTP request method", "", false), diff --git a/crates/weaver_semconv/src/manifest.rs b/crates/weaver_semconv/src/manifest.rs index f2a72eed9..65d66903d 100644 --- a/crates/weaver_semconv/src/manifest.rs +++ b/crates/weaver_semconv/src/manifest.rs @@ -14,18 +14,43 @@ use crate::Error::{InvalidRegistryManifest, RegistryManifestNotFound}; use schemars::JsonSchema; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::path::PathBuf; +use std::sync::OnceLock; use weaver_common::error::handle_errors; use weaver_common::vdir::VirtualDirectoryPath; /// Represents the schema URL of a registry, which serves as a unique identifier for the registry /// along with its version. -#[derive(Debug, Clone, PartialEq, Eq, Hash, JsonSchema)] -pub struct SchemaUrl(pub String); +#[derive(Debug, Clone, JsonSchema)] +pub struct SchemaUrl { + /// The schema URL string. + pub url: String, + #[serde(skip)] + #[schemars(skip)] + name: OnceLock, + #[serde(skip)] + #[schemars(skip)] + version: OnceLock, +} impl SchemaUrl { + /// Create a new SchemaUrl from a string. + #[must_use] + pub fn new(url: String) -> Self { + Self { + url, + name: OnceLock::new(), + version: OnceLock::new(), + } + } + + /// Get the URL as a string. + pub fn as_str(&self) -> &str { + &self.url + } + /// Validate the schema URL format. pub fn validate(&self) -> Result<(), String> { - let parsed = url::Url::parse(&self.0).map_err(|e| format!("Invalid schema URL: {e}"))?; + let parsed = url::Url::parse(&self.url).map_err(|e| format!("Invalid schema URL: {e}"))?; if parsed.path_segments().map(|c| c.count()).unwrap_or(0) == 0 { return Err("The schema URL must have at least one path segment.".to_owned()); } @@ -33,51 +58,72 @@ impl SchemaUrl { } /// Returns the registry name, derived from the schema URL. - pub fn name(&self) -> String { - let parsed_url = url::Url::parse(&self.0).expect("schema_url must be valid"); - let path = parsed_url.path().trim_matches('/'); - let mut segments: Vec<&str> = path.split('/').collect(); - if !segments.is_empty() { - _ = segments.pop(); - } + #[must_use] + pub fn name(&self) -> &str { + self.name.get_or_init(|| { + let parsed_url = url::Url::parse(&self.url).expect("schema_url must be valid"); + let path = parsed_url.path().trim_matches('/'); + let mut segments: Vec<&str> = path.split('/').collect(); + if !segments.is_empty() { + _ = segments.pop(); + } - if segments.is_empty() { - return parsed_url.authority().to_string(); - } + if segments.is_empty() { + return parsed_url.authority().to_owned(); + } - format!("{}/{}", parsed_url.authority(), segments.join("/")) + format!("{}/{}", parsed_url.authority(), segments.join("/")) + }) } /// Returns the registry version, derived from the schema URL. - pub fn version(&self) -> String { - let parsed_url = url::Url::parse(&self.0).expect("schema_url must be valid"); - parsed_url - .path() - .trim_matches('/') - .rsplit('/') - .next() - .unwrap_or("") - .to_string() + #[must_use] + pub fn version(&self) -> &str { + self.version.get_or_init(|| { + let parsed_url = url::Url::parse(&self.url).expect("schema_url must be valid"); + parsed_url + .path() + .trim_matches('/') + .rsplit('/') + .next() + .unwrap_or("") + .to_owned() + }) } /// Create a SchemaUrl from name and version. pub fn from_name_version(name: &str, version: &str) -> Result { - let schema_url_str; // TODO: replace with scheme regex - if name.starts_with("http://") || name.starts_with("https://") { - schema_url_str = format!("{}/{}", name.trim_end_matches('/'), version); - } else { - schema_url_str = format!("https://{}/{}", name.trim_end_matches('/'), version); - } - let schema_url = SchemaUrl(schema_url_str); + let schema_url = SchemaUrl::new( + if name.starts_with("http://") || name.starts_with("https://") { + format!("{}/{}", name.trim_end_matches('/'), version) + } else { + format!("https://{}/{}", name.trim_end_matches('/'), version) + }, + ); + schema_url.validate()?; Ok(schema_url) } } +impl PartialEq for SchemaUrl { + fn eq(&self, other: &Self) -> bool { + self.url == other.url + } +} + +impl Eq for SchemaUrl {} + +impl std::hash::Hash for SchemaUrl { + fn hash(&self, state: &mut H) { + self.url.hash(state); + } +} + impl std::fmt::Display for SchemaUrl { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.0) + write!(f, "{}", self.url) } } @@ -87,7 +133,7 @@ impl<'de> Deserialize<'de> for SchemaUrl { D: Deserializer<'de>, { let s = String::deserialize(deserializer)?; - Ok(SchemaUrl(s)) + Ok(SchemaUrl::new(s)) } } @@ -96,7 +142,7 @@ impl Serialize for SchemaUrl { where S: Serializer, { - serializer.serialize_str(&self.0) + serializer.serialize_str(&self.url) } } @@ -203,10 +249,10 @@ impl<'de> Deserialize<'de> for Dependency { let schema_url = match (helper.schema_url, helper.name) { (Some(url), _) => url, - (None, Some(name)) => SchemaUrl(format!("{}/unknown", name)), + (None, Some(name)) => SchemaUrl::new(format!("{}/unknown", name)), (None, None) => { return Err(serde::de::Error::custom( - "Either 'schema_url' or 'name' must be provided for a dependency" + "Either 'schema_url' or 'name' must be provided for a dependency", )) } }; @@ -282,13 +328,13 @@ impl RegistryManifest { path: path.clone(), error: "The registry schema base URL is required.".to_owned(), }); - } else { - if let Err(e) = url::Url::parse(self.schema_base_url.as_ref().unwrap()) { - errors.push(InvalidRegistryManifest { - path: path.clone(), - error: format!("Invalid schema base URL: {}", e), - }); - } + } else if let Err(e) = + url::Url::parse(&self.schema_base_url.clone().unwrap_or_default()) + { + errors.push(InvalidRegistryManifest { + path: path.clone(), + error: format!("Invalid schema base URL: {}", e), + }); } if self @@ -321,23 +367,23 @@ impl RegistryManifest { /// Returns the registry name, which is derived from the schema URL. /// For example, if the schema URL is `https://opentelemetry.io/schemas/sub-component/1.0.0`, /// the registry name would be `opentelemetry.io/schemas/sub-component` + #[must_use] pub fn name(&self) -> String { self.schema_url .as_ref() - .map(|url| url.name()) + .map(|url| url.name().to_owned()) .unwrap_or_default() - .to_string() } /// Returns the registry version, which is derived from the schema URL. /// For example, if the schema URL is `https://opentelemetry.io/schemas/sub-component/1.0.0`, /// the registry version would be `1.0.0` + #[must_use] pub fn version(&self) -> String { self.schema_url .as_ref() - .map(|url| url.version()) + .map(|url| url.version().to_owned()) .unwrap_or_default() - .to_string() } } diff --git a/crates/weaver_semconv/src/registry.rs b/crates/weaver_semconv/src/registry.rs index 2afbfb507..3fd760d4b 100644 --- a/crates/weaver_semconv/src/registry.rs +++ b/crates/weaver_semconv/src/registry.rs @@ -5,7 +5,7 @@ use crate::attribute::AttributeSpecWithProvenance; use crate::group::{GroupSpecWithProvenance, ImportsWithProvenance}; use crate::json_schema::JsonSchemaValidator; -use crate::manifest::RegistryManifest; +use crate::manifest::{RegistryManifest, SchemaUrl}; use crate::provenance::Provenance; use crate::registry_repo::RegistryRepo; use crate::semconv::{SemConvSpecV1WithProvenance, SemConvSpecWithProvenance}; @@ -143,22 +143,21 @@ impl SemConvRegistry { } } + let schema_url = SchemaUrl::from_name_version(®istry_repo.name(), &semconv_version) + .map_err(|e| Error::InvalidRegistryManifest { + path: registry_repo.registry_path_repr().into(), + error: e.clone(), + })?; + registry.set_manifest(RegistryManifest { file_format: None, - schema_url: registry_repo - .manifest() - .and_then(|m| Some(m.schema_url.clone())) - .unwrap_or_default(), - schema_base_url: registry_repo - .manifest() - .and_then(|m| m.schema_base_url.clone()), - semconv_version: registry_repo - .manifest() - .and_then(|m| m.semconv_version.clone()), + schema_url: Some(schema_url), description: registry_repo.manifest().and_then(|m| m.description.clone()), dependencies: vec![], resolved_schema_uri: None, stability: crate::stability::Stability::Development, + semconv_version: None, + schema_base_url: None, }); } else { registry.manifest = registry_repo.manifest().cloned(); @@ -394,8 +393,11 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "data".to_owned(), }; - let registry_repo = - RegistryRepo::try_new(Some(SchemaUrl("https://test/42".to_owned())), ®istry_path).unwrap(); + let registry_repo = RegistryRepo::try_new( + Some(SchemaUrl::new("https://test/42".to_owned())), + ®istry_path, + ) + .unwrap(); let registry = SemConvRegistry::from_semconv_specs(®istry_repo, semconv_specs).unwrap(); assert_eq!(registry.id(), "test"); assert_eq!(registry.semconv_spec_count(), 2); diff --git a/crates/weaver_semconv/src/registry_repo.rs b/crates/weaver_semconv/src/registry_repo.rs index 2a5de63fe..e07628534 100644 --- a/crates/weaver_semconv/src/registry_repo.rs +++ b/crates/weaver_semconv/src/registry_repo.rs @@ -42,16 +42,14 @@ pub struct RegistryRepo { impl RegistryRepo { /// Creates a new `RegistryRepo` from a `Dependency` object that specifies the schema URL and path. - pub fn try_new_dependency( - dependency: &Dependency, - ) -> Result { + pub fn try_new_dependency(dependency: &Dependency) -> Result { let path = dependency.registry_path.clone().unwrap_or_else(|| { - // If no registry path is provided, we assume it's the same as the parent registry. - VirtualDirectoryPath::RemoteArchive { - url: dependency.schema_url.to_string(), - sub_folder: None, - } - }); + // If no registry path is provided, we assume it's the same as the parent registry. + VirtualDirectoryPath::RemoteArchive { + url: dependency.schema_url.to_string(), + sub_folder: None, + } + }); Self::try_new(Some(dependency.schema_url.clone()), &path) } @@ -65,9 +63,6 @@ impl RegistryRepo { ) -> Result { let registry = VirtualDirectory::try_new(registry_path).map_err(Error::VirtualDirectoryError)?; - let mut manifest = None; - let mut registry_name = None; - let mut registry_version = None; // Try to load manifest if let Some(manifest_path) = { // We need a temporary RegistryRepo to call manifest_path @@ -80,20 +75,29 @@ impl RegistryRepo { temp_repo.manifest_path() } { let registry_manifest = RegistryManifest::try_from_file(manifest_path)?; - registry_name = Some(registry_manifest.name()); - registry_version = Some(registry_manifest.version()); - manifest = Some(registry_manifest); + Ok(Self { + name: registry_manifest.name().into(), + version: registry_manifest.version().into(), + registry, + manifest: Some(registry_manifest), + }) } else { - // No manifest, require name and version - registry_name = schema_url.as_ref().map(|url| url.name()).or(Some("unknown".to_owned())); - registry_version = schema_url.as_ref().map(|url| url.version()).or(Some("unknown".to_owned())); + // No manifest + Ok(Self { + name: Arc::from( + schema_url + .as_ref() + .map_or("unknown".to_owned(), |url| url.name().to_owned()), + ), + version: Arc::from( + schema_url + .as_ref() + .map_or("unknown".to_owned(), |url| url.version().to_owned()), + ), + registry, + manifest: None, + }) } - Ok(Self { - name: registry_name.unwrap().into(), - version: registry_version.unwrap().into(), - registry, - manifest, - }) } /// Returns the registry name (from manifest if present, otherwise top-level field). @@ -242,8 +246,8 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "tests/published_repository/resolved/1.0.0".to_owned(), }; - let repo = RegistryRepo::try_new(None, ®istry_path) - .expect("Failed to load test repository."); + let repo = + RegistryRepo::try_new(None, ®istry_path).expect("Failed to load test repository."); let Some(manifest) = repo.manifest() else { panic!("Did not resolve manifest for repo: {repo:?}"); }; @@ -264,8 +268,8 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "tests/published_repository/resolved/2.0.0".to_owned(), }; - let repo = RegistryRepo::try_new(None, ®istry_path) - .expect("Failed to load test repository."); + let repo = + RegistryRepo::try_new(None, ®istry_path).expect("Failed to load test repository."); let Some(resolved_path) = repo.resolved_schema_uri() else { panic!( "Should find a resolved schema path from manifest in {}", @@ -278,8 +282,8 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "tests/published_repository/3.0.0".to_owned(), }; - let repo = RegistryRepo::try_new(None, ®istry_path) - .expect("Failed to load test repository."); + let repo = + RegistryRepo::try_new(None, ®istry_path).expect("Failed to load test repository."); let Some(resolved_path) = repo.resolved_schema_uri() else { panic!( "Should find a resolved schema path from manifest in {}", diff --git a/crates/weaver_semconv_gen/src/v2.rs b/crates/weaver_semconv_gen/src/v2.rs index 47ea0369c..586080a7a 100644 --- a/crates/weaver_semconv_gen/src/v2.rs +++ b/crates/weaver_semconv_gen/src/v2.rs @@ -452,7 +452,7 @@ mod tests { fn test_registry() -> ResolvedTelemetrySchema { ResolvedTelemetrySchema { file_format: "resolved/2.0.0".to_owned(), - schema_url: SchemaUrl("https://todo/1.0.0".to_owned()), + schema_url: SchemaUrl::new("https://todo/1.0.0".to_owned()), attribute_catalog: vec![Attribute { key: "attr1".to_owned(), r#type: AttributeType::PrimitiveOrArray(PrimitiveOrArrayTypeSpec::String), @@ -568,7 +568,7 @@ mod tests { common: CommonFields::default(), }, }], - } + }, } } } diff --git a/src/registry/diff.rs b/src/registry/diff.rs index d013baff9..ab12ac72f 100644 --- a/src/registry/diff.rs +++ b/src/registry/diff.rs @@ -101,12 +101,8 @@ pub(crate) fn command(args: &RegistryDiffArgs) -> Result Result Result WeaverEngine<'a> { diag_msgs: &mut DiagnosticMessages, ) -> Result { let registry_path = &self.registry_config.registry; - let main_registry_repo = - RegistryRepo::try_new(None, registry_path)?; + let main_registry_repo = RegistryRepo::try_new(None, registry_path)?; self.load_definitions(main_registry_repo, diag_msgs) } diff --git a/tests/resolution_process.rs b/tests/resolution_process.rs index 050472f81..3d7c009d5 100644 --- a/tests/resolution_process.rs +++ b/tests/resolution_process.rs @@ -34,12 +34,10 @@ fn test_cli_interface() { refspec: None, }; - let schema_url = Some(SchemaUrl("https://opelemetry.io/schemas/1.40.0".to_owned())); - let registry_repo = - RegistryRepo::try_new(schema_url, ®istry_path) - .unwrap_or_else(|e| { - panic!("Failed to create the registry repo, error: {e}"); - }); + let schema_url = Some(SchemaUrl::new("https://opelemetry.io/schemas/1.40.0".to_owned())); + let registry_repo = RegistryRepo::try_new(schema_url, ®istry_path).unwrap_or_else(|e| { + panic!("Failed to create the registry repo, error: {e}"); + }); let loaded = SchemaResolver::load_semconv_repository(registry_repo, false) .ignore(|e| matches!(e.severity(), Some(miette::Severity::Warning))) .into_result_failing_non_fatal() From 4ee4cd5d1c8d0b04b077d0e3aef934d7b491ade2 Mon Sep 17 00:00:00 2001 From: Liudmila Molkova Date: Sat, 14 Feb 2026 13:46:51 -0800 Subject: [PATCH 05/27] cleanup --- crates/weaver_resolved_schema/src/error.rs | 13 ++++- crates/weaver_resolved_schema/src/lib.rs | 53 +++++++++++---------- crates/weaver_resolved_schema/src/v2/mod.rs | 29 +++++++---- crates/weaver_resolver/src/dependency.rs | 6 +-- crates/weaver_resolver/src/lib.rs | 5 +- crates/weaver_resolver/src/loader.rs | 4 +- 6 files changed, 66 insertions(+), 44 deletions(-) diff --git a/crates/weaver_resolved_schema/src/error.rs b/crates/weaver_resolved_schema/src/error.rs index 99a1f3750..2986e22e7 100644 --- a/crates/weaver_resolved_schema/src/error.rs +++ b/crates/weaver_resolved_schema/src/error.rs @@ -5,7 +5,7 @@ use serde::{Deserialize, Serialize}; use crate::attribute::AttributeRef; -use crate::error::Error::{AttributeNotFound, CompoundError, EventNameNotFound}; +use crate::error::Error::{AttributeNotFound, CompoundError, EventNameNotFound, InvalidSchemaUrl}; /// Errors emitted by this crate. #[derive(thiserror::Error, Debug, Clone, Deserialize, Serialize)] @@ -26,6 +26,16 @@ pub enum Error { group_id: String, }, + /// Cannot convert from V1 to V2 schema due to invalid schema URL. + #[error("Failed to convert from V1 to V2 schema, invalid schema URL: {url}, error: {error}")] + InvalidSchemaUrl { + /// The invalid schema URL. + url: String, + + /// The error message from the URL validation. + error: String, + }, + /// A generic container for multiple errors. #[error("Errors:\n{0:#?}")] CompoundError(Vec), @@ -53,6 +63,7 @@ impl Error { CompoundError(errors) => errors, e @ AttributeNotFound { .. } => vec![e], e @ EventNameNotFound { .. } => vec![e], + e @ InvalidSchemaUrl { .. } => vec![e], }) .collect(), ) diff --git a/crates/weaver_resolved_schema/src/lib.rs b/crates/weaver_resolved_schema/src/lib.rs index f16385777..dc798ff14 100644 --- a/crates/weaver_resolved_schema/src/lib.rs +++ b/crates/weaver_resolved_schema/src/lib.rs @@ -15,7 +15,7 @@ use serde::{Deserialize, Serialize}; use std::collections::HashMap; use weaver_semconv::deprecated::Deprecated; use weaver_semconv::group::GroupType; -use weaver_semconv::manifest::{RegistryManifest, SchemaUrl}; +use weaver_semconv::manifest::RegistryManifest; use weaver_version::schema_changes::{SchemaChanges, SchemaItemChange, SchemaItemType}; use weaver_version::Versions; @@ -51,8 +51,10 @@ pub(crate) const V2_RESOLVED_FILE_FORMAT: &str = "resolved/2.0.0"; pub struct ResolvedTelemetrySchema { /// Version of the file structure. pub file_format: String, - /// Schema URL that this file is or will be published at. - pub schema_url: SchemaUrl, + /// Schema URL that this file is published at. + pub schema_url: String, + /// The ID of the registry that this schema belongs to. + pub registry_id: String, /// The registry that this schema belongs to. pub registry: Registry, /// Catalog of unique items that are shared across multiple registries @@ -77,8 +79,7 @@ pub struct ResolvedTelemetrySchema { #[serde(skip_serializing_if = "Option::is_none")] pub versions: Option, /// The manifest of the registry. - #[serde(skip)] - pub manifest: Option, + pub registry_manifest: Option, } /// Statistics on a resolved telemetry schema. @@ -93,18 +94,18 @@ pub struct Stats { impl ResolvedTelemetrySchema { /// Create a new resolved telemetry schema. - pub fn new>(schema_url: S, registry_url: S) -> Self { + pub fn new>(schema_url: S, registry_id: S, registry_url: S) -> Self { Self { file_format: V1_RESOLVED_FILE_FORMAT.to_owned(), - // TODO: is it correct? - schema_url: SchemaUrl::new(schema_url.as_ref().to_owned()), + schema_url: schema_url.as_ref().to_owned(), + registry_id: registry_id.as_ref().to_owned(), registry: Registry::new(registry_url), catalog: Catalog::default(), resource: None, instrumentation_library: None, dependencies: vec![], versions: None, - manifest: None, + registry_manifest: None, } } @@ -295,13 +296,13 @@ impl ResolvedTelemetrySchema { pub fn diff(&self, baseline_schema: &ResolvedTelemetrySchema) -> SchemaChanges { let mut changes = SchemaChanges::new(); - if let Some(ref manifest) = self.manifest { + if let Some(ref manifest) = self.registry_manifest { changes.set_head_manifest(weaver_version::schema_changes::RegistryManifest { semconv_version: manifest.version().clone(), }); } - if let Some(ref manifest) = baseline_schema.manifest { + if let Some(ref manifest) = baseline_schema.registry_manifest { changes.set_baseline_manifest(weaver_version::schema_changes::RegistryManifest { semconv_version: manifest.version().clone(), }); @@ -538,7 +539,7 @@ mod tests { #[test] fn no_diff() { - let mut prior_schema = ResolvedTelemetrySchema::new("1.0", ""); + let mut prior_schema = ResolvedTelemetrySchema::new("1.0", "", ""); prior_schema.add_attribute_group( "group1", [ @@ -555,7 +556,7 @@ mod tests { #[test] fn detect_2_added_registry_attributes() { - let mut prior_schema = ResolvedTelemetrySchema::new("1.0", ""); + let mut prior_schema = ResolvedTelemetrySchema::new("1.0", "", ""); prior_schema.add_attribute_group( "registry.group1", [ @@ -564,7 +565,7 @@ mod tests { ], ); - let mut latest_schema = ResolvedTelemetrySchema::new("1.0", ""); + let mut latest_schema = ResolvedTelemetrySchema::new("1.0", "", ""); latest_schema.add_attribute_group( "registry.group1", [ @@ -583,7 +584,7 @@ mod tests { #[test] fn detect_2_deprecated_registry_attributes() { - let mut prior_schema = ResolvedTelemetrySchema::new("1.0", ""); + let mut prior_schema = ResolvedTelemetrySchema::new("1.0", "", ""); prior_schema.add_attribute_group( "registry.group1", [ @@ -597,7 +598,7 @@ mod tests { ], ); - let mut latest_schema = ResolvedTelemetrySchema::new("1.0", ""); + let mut latest_schema = ResolvedTelemetrySchema::new("1.0", "", ""); latest_schema.add_attribute_group( "registry.group1", [ @@ -644,7 +645,7 @@ mod tests { #[test] fn detect_2_renamed_registry_attributes() { - let mut prior_schema = ResolvedTelemetrySchema::new("http://test/schemas/1.0", ""); + let mut prior_schema = ResolvedTelemetrySchema::new("http://test/schemas/1.0", "", ""); prior_schema.add_attribute_group( "registry.group1", [ @@ -658,7 +659,7 @@ mod tests { // 2 new attributes are added: attr2_bis and attr3_bis // attr2 is renamed attr2_bis // attr3 is renamed attr3_bis - let mut latest_schema = ResolvedTelemetrySchema::new("http://test/schemas/2.0", ""); + let mut latest_schema = ResolvedTelemetrySchema::new("http://test/schemas/2.0", "", ""); latest_schema.add_attribute_group( "registry.group1", [ @@ -692,7 +693,7 @@ mod tests { #[test] fn detect_2_attributes_renamed_to_the_same_existing_attribute() { - let mut prior_schema = ResolvedTelemetrySchema::new("1.0", ""); + let mut prior_schema = ResolvedTelemetrySchema::new("1.0", "", ""); prior_schema.add_attribute_group( "registry.group1", [ @@ -704,7 +705,7 @@ mod tests { ); prior_schema.add_attribute_group("group2", [Attribute::string("attr5", "brief", "note")]); - let mut latest_schema = ResolvedTelemetrySchema::new("1.0", ""); + let mut latest_schema = ResolvedTelemetrySchema::new("1.0", "", ""); latest_schema.add_attribute_group( "registry.group1", [ @@ -731,7 +732,7 @@ mod tests { #[test] fn detect_2_attributes_renamed_to_the_same_new_attribute() { - let mut prior_schema = ResolvedTelemetrySchema::new("1.0", ""); + let mut prior_schema = ResolvedTelemetrySchema::new("1.0", "", ""); prior_schema.add_attribute_group( "registry.group1", [ @@ -742,7 +743,7 @@ mod tests { ], ); - let mut latest_schema = ResolvedTelemetrySchema::new("1.0", ""); + let mut latest_schema = ResolvedTelemetrySchema::new("1.0", "", ""); latest_schema.add_attribute_group( "registry.group1", [ @@ -775,7 +776,7 @@ mod tests { /// However, detecting this case is useful for identifying a violation of the process. #[test] fn detect_2_removed_attributes() { - let mut prior_schema = ResolvedTelemetrySchema::new("1.0", ""); + let mut prior_schema = ResolvedTelemetrySchema::new("1.0", "", ""); prior_schema.add_attribute_group( "registry.group1", [ @@ -786,7 +787,7 @@ mod tests { ], ); - let mut latest_schema = ResolvedTelemetrySchema::new("1.0", ""); + let mut latest_schema = ResolvedTelemetrySchema::new("1.0", "", ""); latest_schema.add_attribute_group( "registry.group1", [ @@ -804,9 +805,9 @@ mod tests { // TODO add many more group diff checks for various capabilities. #[test] fn detect_metric_name_change() { - let mut prior_schema = ResolvedTelemetrySchema::new("http://test/schemas/1.0", ""); + let mut prior_schema = ResolvedTelemetrySchema::new("http://test/schemas/1.0", "", ""); prior_schema.add_metric_group("metrics.cpu.time", "cpu.time", [], None); - let mut latest_schema = ResolvedTelemetrySchema::new("http://test/schemas/2.0", ""); + let mut latest_schema = ResolvedTelemetrySchema::new("http://test/schemas/2.0", "", ""); latest_schema.add_metric_group( "metrics.cpu.time", "cpu.time", diff --git a/crates/weaver_resolved_schema/src/v2/mod.rs b/crates/weaver_resolved_schema/src/v2/mod.rs index e4a83d845..c3bcf416f 100644 --- a/crates/weaver_resolved_schema/src/v2/mod.rs +++ b/crates/weaver_resolved_schema/src/v2/mod.rs @@ -123,13 +123,21 @@ impl TryFrom for ResolvedTelemetrySchema { fn try_from(value: crate::ResolvedTelemetrySchema) -> Result { let (attribute_catalog, registry, refinements) = convert_v1_to_v2(value.catalog, value.registry)?; - Ok(ResolvedTelemetrySchema { - file_format: V2_RESOLVED_FILE_FORMAT.to_owned(), - schema_url: value.schema_url, - attribute_catalog, - registry, - refinements, - }) + let schema_url = SchemaUrl::new(value.schema_url); + + match schema_url.validate() { + Ok(_) => Ok(ResolvedTelemetrySchema { + file_format: V2_RESOLVED_FILE_FORMAT.to_owned(), + schema_url, + attribute_catalog, + registry, + refinements, + }), + Err(e) => Err(crate::error::Error::InvalidSchemaUrl { + url: schema_url.to_string(), + error: e.clone(), + }), + } } } @@ -981,17 +989,18 @@ mod tests { fn test_try_from_v1_to_v2() { let v1_schema = crate::ResolvedTelemetrySchema { file_format: V1_RESOLVED_FILE_FORMAT.to_owned(), - schema_url: SchemaUrl::new("http://test/schemas/1.0.0".to_owned()), + schema_url: "http://test/schemas/1.0.0".to_owned(), + registry_id: "my-registry".to_owned(), catalog: crate::catalog::Catalog::from_attributes(vec![]), registry: crate::registry::Registry { - registry_url: "http://test/schemas/1.0".to_owned(), + registry_url: "http://test/schemas/1.0.0".to_owned(), groups: vec![], }, instrumentation_library: None, resource: None, dependencies: vec![], versions: None, - manifest: None, + registry_manifest: None, }; let v2_schema: Result = v1_schema.try_into(); diff --git a/crates/weaver_resolver/src/dependency.rs b/crates/weaver_resolver/src/dependency.rs index f6b5bb8da..56f041de8 100644 --- a/crates/weaver_resolver/src/dependency.rs +++ b/crates/weaver_resolver/src/dependency.rs @@ -439,7 +439,6 @@ mod tests { use itertools::Itertools; use std::error::Error; use weaver_resolved_schema::ResolvedTelemetrySchema as V1Schema; - use weaver_semconv::manifest::SchemaUrl; use crate::dependency::{ResolvedDependency, UnresolvedAttributeLookup}; @@ -471,7 +470,8 @@ mod tests { fn example_v1_schema() -> V1Schema { V1Schema { file_format: "resolved/1.0.0".to_owned(), - schema_url: SchemaUrl::new("http://test/schemas/1.0.0".to_owned()), + schema_url: "http://test/schemas/1.0.0".to_owned(), + registry_id: "test-registry".to_owned(), registry: weaver_resolved_schema::registry::Registry { registry_url: "v1-example".to_owned(), groups: vec![weaver_resolved_schema::registry::Group { @@ -523,7 +523,7 @@ mod tests { instrumentation_library: None, dependencies: vec![], versions: None, - manifest: None, + registry_manifest: None, } } } diff --git a/crates/weaver_resolver/src/lib.rs b/crates/weaver_resolver/src/lib.rs index 97c3b665f..b4a61142a 100644 --- a/crates/weaver_resolver/src/lib.rs +++ b/crates/weaver_resolver/src/lib.rs @@ -118,14 +118,15 @@ impl SchemaResolver { ResolvedTelemetrySchema { file_format: "1.0.0".to_owned(), - schema_url, + schema_url: schema_url.url.clone(), + registry_id: schema_url.name().to_owned(), registry: resolved_registry, catalog, resource: None, instrumentation_library: None, dependencies: vec![], versions: None, // ToDo LQ: Implement this! - manifest, + registry_manifest: manifest, } }) } diff --git a/crates/weaver_resolver/src/loader.rs b/crates/weaver_resolver/src/loader.rs index a73843ca3..fd94a17d0 100644 --- a/crates/weaver_resolver/src/loader.rs +++ b/crates/weaver_resolver/src/loader.rs @@ -79,7 +79,7 @@ impl LoadedSemconvRegistry { match self { LoadedSemconvRegistry::Unresolved { repo, .. } => repo.registry_path_repr(), // TODO - are these correct? - LoadedSemconvRegistry::Resolved(schema) => &schema.schema_url.url, + LoadedSemconvRegistry::Resolved(schema) => &schema.schema_url, LoadedSemconvRegistry::ResolvedV2(schema) => &schema.schema_url.url, } } @@ -115,7 +115,7 @@ impl LoadedSemconvRegistry { } result } - LoadedSemconvRegistry::Resolved(schema) => vec![schema.schema_url.name().to_owned()], + LoadedSemconvRegistry::Resolved(schema) => vec![schema.registry_id.to_owned()], LoadedSemconvRegistry::ResolvedV2(schema) => vec![schema.schema_url.name().to_owned()], } } From afd3cd6fbf965d9cc310d6384ef98c08d0afab95 Mon Sep 17 00:00:00 2001 From: Liudmila Molkova Date: Sat, 14 Feb 2026 14:24:34 -0800 Subject: [PATCH 06/27] cleanup --- crates/weaver_emit/src/lib.rs | 2 +- crates/weaver_forge/src/lib.rs | 2 +- crates/weaver_forge/src/v2/registry.rs | 2 +- crates/weaver_live_check/src/live_checker.rs | 5 +- crates/weaver_mcp/src/service.rs | 5 +- crates/weaver_resolved_schema/src/v2/mod.rs | 2 +- crates/weaver_resolver/src/dependency.rs | 2 +- crates/weaver_resolver/src/lib.rs | 2 +- crates/weaver_resolver/src/registry.rs | 2 +- crates/weaver_search/src/lib.rs | 2 +- crates/weaver_semconv/src/lib.rs | 1 + crates/weaver_semconv/src/manifest.rs | 256 +++++++------ crates/weaver_semconv/src/registry.rs | 5 +- crates/weaver_semconv/src/registry_repo.rs | 3 +- crates/weaver_semconv/src/schema_url.rs | 356 +++++++++++++++++++ crates/weaver_semconv_gen/src/v2.rs | 2 +- tests/resolution_process.rs | 6 +- 17 files changed, 507 insertions(+), 148 deletions(-) create mode 100644 crates/weaver_semconv/src/schema_url.rs diff --git a/crates/weaver_emit/src/lib.rs b/crates/weaver_emit/src/lib.rs index c00c0b070..ca034ac48 100644 --- a/crates/weaver_emit/src/lib.rs +++ b/crates/weaver_emit/src/lib.rs @@ -268,7 +268,7 @@ mod tests { use weaver_semconv::{ attribute::{AttributeType, Examples, PrimitiveOrArrayTypeSpec, RequirementLevel}, group::{GroupType, InstrumentSpec, SpanKindSpec}, - manifest::SchemaUrl, + schema_url::SchemaUrl, stability::Stability, }; diff --git a/crates/weaver_forge/src/lib.rs b/crates/weaver_forge/src/lib.rs index ba2f59a0d..f0637853a 100644 --- a/crates/weaver_forge/src/lib.rs +++ b/crates/weaver_forge/src/lib.rs @@ -841,8 +841,8 @@ mod tests { use weaver_common::vdir::VirtualDirectoryPath; use weaver_diff::diff_dir; use weaver_resolver::{LoadedSemconvRegistry, SchemaResolver}; - use weaver_semconv::manifest::SchemaUrl; use weaver_semconv::registry_repo::RegistryRepo; + use weaver_semconv::schema_url::SchemaUrl; use crate::config::{ApplicationMode, CaseConvention, Params, TemplateConfig, WeaverConfig}; use crate::debug::print_dedup_errors; diff --git a/crates/weaver_forge/src/v2/registry.rs b/crates/weaver_forge/src/v2/registry.rs index 2db318af5..d67117dbe 100644 --- a/crates/weaver_forge/src/v2/registry.rs +++ b/crates/weaver_forge/src/v2/registry.rs @@ -3,7 +3,7 @@ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use weaver_resolved_schema::{attribute::AttributeRef, v2::catalog::AttributeCatalog}; -use weaver_semconv::manifest::SchemaUrl; +use weaver_semconv::schema_url::SchemaUrl; use crate::{ error::Error, diff --git a/crates/weaver_live_check/src/live_checker.rs b/crates/weaver_live_check/src/live_checker.rs index c7f960da5..77e1547fd 100644 --- a/crates/weaver_live_check/src/live_checker.rs +++ b/crates/weaver_live_check/src/live_checker.rs @@ -64,7 +64,8 @@ impl LiveChecker { } } for attribute in &group.attributes { - let attribute_rc = Rc::new(VersionedAttribute::V1(Box::new(attribute.clone()))); + let attribute_rc = + Rc::new(VersionedAttribute::V1(Box::new(attribute.clone()))); match attribute.r#type { AttributeType::Template(_) => { templates_by_length @@ -195,7 +196,7 @@ mod tests { YamlValue, }; use weaver_semconv::{ - manifest::SchemaUrl, + schema_url::SchemaUrl, v2::{span::SpanName, CommonFields}, }; diff --git a/crates/weaver_mcp/src/service.rs b/crates/weaver_mcp/src/service.rs index 9850dbd6d..06afebb22 100644 --- a/crates/weaver_mcp/src/service.rs +++ b/crates/weaver_mcp/src/service.rs @@ -58,7 +58,8 @@ impl WeaverMcpService { let search_context = Arc::new(SearchContext::from_registry(®istry)); // Create versioned registry wrapper once for live check - let versioned_registry = Arc::new(VersionedRegistry::V2(Box::new(registry.as_ref().clone()))); + let versioned_registry = + Arc::new(VersionedRegistry::V2(Box::new(registry.as_ref().clone()))); Self { search_context, @@ -386,7 +387,7 @@ mod tests { use weaver_search::SearchType; use weaver_semconv::attribute::AttributeType; use weaver_semconv::group::{InstrumentSpec, SpanKindSpec}; - use weaver_semconv::manifest::SchemaUrl; + use weaver_semconv::schema_url::SchemaUrl; use weaver_semconv::stability::Stability; use weaver_semconv::v2::span::SpanName; use weaver_semconv::v2::CommonFields; diff --git a/crates/weaver_resolved_schema/src/v2/mod.rs b/crates/weaver_resolved_schema/src/v2/mod.rs index c3bcf416f..20b46b682 100644 --- a/crates/weaver_resolved_schema/src/v2/mod.rs +++ b/crates/weaver_resolved_schema/src/v2/mod.rs @@ -7,7 +7,7 @@ use serde::{Deserialize, Serialize}; use weaver_semconv::{ deprecated::Deprecated, group::GroupType, - manifest::SchemaUrl, + schema_url::SchemaUrl, v2::{ attribute_group::AttributeGroupVisibilitySpec, signal_id::SignalId, span::SpanName, CommonFields, diff --git a/crates/weaver_resolver/src/dependency.rs b/crates/weaver_resolver/src/dependency.rs index 56f041de8..7c6caee8d 100644 --- a/crates/weaver_resolver/src/dependency.rs +++ b/crates/weaver_resolver/src/dependency.rs @@ -20,7 +20,7 @@ use crate::{attribute::AttributeCatalog, Error}; pub(crate) enum ResolvedDependency { /// A V1 Dependency V1(Box), - // A V2 Dependency + /// A V2 Dependency V2(Box), } diff --git a/crates/weaver_resolver/src/lib.rs b/crates/weaver_resolver/src/lib.rs index b4a61142a..d980dfed1 100644 --- a/crates/weaver_resolver/src/lib.rs +++ b/crates/weaver_resolver/src/lib.rs @@ -3,7 +3,7 @@ #![doc = include_str!("../README.md")] use weaver_semconv::group::ImportsWithProvenance; -use weaver_semconv::manifest::SchemaUrl; +use weaver_semconv::schema_url::SchemaUrl; use crate::attribute::AttributeCatalog; use crate::dependency::ResolvedDependency; diff --git a/crates/weaver_resolver/src/registry.rs b/crates/weaver_resolver/src/registry.rs index 3916363bf..f5f3620cc 100644 --- a/crates/weaver_resolver/src/registry.rs +++ b/crates/weaver_resolver/src/registry.rs @@ -851,7 +851,7 @@ mod tests { use std::error::Error; use std::fs::OpenOptions; use std::path::PathBuf; - use weaver_semconv::manifest::SchemaUrl; + use weaver_semconv::schema_url::SchemaUrl; use glob::glob; use serde::Serialize; diff --git a/crates/weaver_search/src/lib.rs b/crates/weaver_search/src/lib.rs index 8525af99f..3c7154782 100644 --- a/crates/weaver_search/src/lib.rs +++ b/crates/weaver_search/src/lib.rs @@ -478,7 +478,7 @@ mod tests { use weaver_semconv::attribute::AttributeType; use weaver_semconv::deprecated::Deprecated; use weaver_semconv::group::{InstrumentSpec, SpanKindSpec}; - use weaver_semconv::manifest::SchemaUrl; + use weaver_semconv::schema_url::SchemaUrl; use weaver_semconv::stability::Stability; use weaver_semconv::v2::span::SpanName; use weaver_semconv::v2::CommonFields; diff --git a/crates/weaver_semconv/src/lib.rs b/crates/weaver_semconv/src/lib.rs index df0346e0b..7e312cac2 100644 --- a/crates/weaver_semconv/src/lib.rs +++ b/crates/weaver_semconv/src/lib.rs @@ -21,6 +21,7 @@ pub mod manifest; pub mod provenance; pub mod registry; pub mod registry_repo; +pub mod schema_url; pub mod semconv; pub mod stability; pub mod stats; diff --git a/crates/weaver_semconv/src/manifest.rs b/crates/weaver_semconv/src/manifest.rs index 65d66903d..d8f268368 100644 --- a/crates/weaver_semconv/src/manifest.rs +++ b/crates/weaver_semconv/src/manifest.rs @@ -8,144 +8,16 @@ //! In the future, this struct may be extended to include additional information //! such as the registry's owner, maintainers, and dependencies. +use crate::schema_url::SchemaUrl; use crate::stability::Stability; use crate::Error; use crate::Error::{InvalidRegistryManifest, RegistryManifestNotFound}; use schemars::JsonSchema; -use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use serde::{Deserialize, Deserializer, Serialize}; use std::path::PathBuf; -use std::sync::OnceLock; use weaver_common::error::handle_errors; use weaver_common::vdir::VirtualDirectoryPath; -/// Represents the schema URL of a registry, which serves as a unique identifier for the registry -/// along with its version. -#[derive(Debug, Clone, JsonSchema)] -pub struct SchemaUrl { - /// The schema URL string. - pub url: String, - #[serde(skip)] - #[schemars(skip)] - name: OnceLock, - #[serde(skip)] - #[schemars(skip)] - version: OnceLock, -} - -impl SchemaUrl { - /// Create a new SchemaUrl from a string. - #[must_use] - pub fn new(url: String) -> Self { - Self { - url, - name: OnceLock::new(), - version: OnceLock::new(), - } - } - - /// Get the URL as a string. - pub fn as_str(&self) -> &str { - &self.url - } - - /// Validate the schema URL format. - pub fn validate(&self) -> Result<(), String> { - let parsed = url::Url::parse(&self.url).map_err(|e| format!("Invalid schema URL: {e}"))?; - if parsed.path_segments().map(|c| c.count()).unwrap_or(0) == 0 { - return Err("The schema URL must have at least one path segment.".to_owned()); - } - Ok(()) - } - - /// Returns the registry name, derived from the schema URL. - #[must_use] - pub fn name(&self) -> &str { - self.name.get_or_init(|| { - let parsed_url = url::Url::parse(&self.url).expect("schema_url must be valid"); - let path = parsed_url.path().trim_matches('/'); - let mut segments: Vec<&str> = path.split('/').collect(); - if !segments.is_empty() { - _ = segments.pop(); - } - - if segments.is_empty() { - return parsed_url.authority().to_owned(); - } - - format!("{}/{}", parsed_url.authority(), segments.join("/")) - }) - } - - /// Returns the registry version, derived from the schema URL. - #[must_use] - pub fn version(&self) -> &str { - self.version.get_or_init(|| { - let parsed_url = url::Url::parse(&self.url).expect("schema_url must be valid"); - parsed_url - .path() - .trim_matches('/') - .rsplit('/') - .next() - .unwrap_or("") - .to_owned() - }) - } - - /// Create a SchemaUrl from name and version. - pub fn from_name_version(name: &str, version: &str) -> Result { - // TODO: replace with scheme regex - let schema_url = SchemaUrl::new( - if name.starts_with("http://") || name.starts_with("https://") { - format!("{}/{}", name.trim_end_matches('/'), version) - } else { - format!("https://{}/{}", name.trim_end_matches('/'), version) - }, - ); - - schema_url.validate()?; - Ok(schema_url) - } -} - -impl PartialEq for SchemaUrl { - fn eq(&self, other: &Self) -> bool { - self.url == other.url - } -} - -impl Eq for SchemaUrl {} - -impl std::hash::Hash for SchemaUrl { - fn hash(&self, state: &mut H) { - self.url.hash(state); - } -} - -impl std::fmt::Display for SchemaUrl { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.url) - } -} - -impl<'de> Deserialize<'de> for SchemaUrl { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - let s = String::deserialize(deserializer)?; - Ok(SchemaUrl::new(s)) - } -} - -impl Serialize for SchemaUrl { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - serializer.serialize_str(&self.url) - } -} - /// Represents the information of a semantic convention registry manifest. /// /// This information defines the registry's name, version, description, and schema @@ -443,4 +315,128 @@ mod tests { panic!("Expected an error, but got a result."); } } + + // Dependency tests + #[test] + fn test_dependency_deserialize_with_schema_url() { + let yaml = r#" +schema_url: "https://opentelemetry.io/schemas/1.0.0" +"#; + let dep: Dependency = serde_yaml::from_str(yaml).expect("Failed to deserialize"); + assert_eq!( + dep.schema_url.as_str(), + "https://opentelemetry.io/schemas/1.0.0" + ); + assert!(dep.registry_path.is_none()); + } + + #[test] + fn test_dependency_deserialize_with_registry_path() { + let yaml = r#" +schema_url: "https://opentelemetry.io/schemas/1.0.0" +registry_path: "./registry" +"#; + let dep: Dependency = serde_yaml::from_str(yaml).expect("Failed to deserialize"); + assert_eq!( + dep.schema_url.as_str(), + "https://opentelemetry.io/schemas/1.0.0" + ); + assert!(dep.registry_path.is_some()); + } + + #[test] + fn test_dependency_deserialize_with_deprecated_name() { + let yaml = r#" +name: "acme-registry" +"#; + let dep: Dependency = serde_yaml::from_str(yaml).expect("Failed to deserialize"); + assert_eq!(dep.schema_url.as_str(), "acme-registry/unknown"); + } + + #[test] + fn test_dependency_deserialize_schema_url_takes_precedence() { + let yaml = r#" +schema_url: "https://opentelemetry.io/schemas/1.0.0" +name: "ignored-name" +"#; + let dep: Dependency = serde_yaml::from_str(yaml).expect("Failed to deserialize"); + assert_eq!( + dep.schema_url.as_str(), + "https://opentelemetry.io/schemas/1.0.0" + ); + } + + #[test] + fn test_dependency_deserialize_missing_both_fields() { + let yaml = r#" +registry_path: "./registry" +"#; + let result: Result = serde_yaml::from_str(yaml); + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err + .to_string() + .contains("Either 'schema_url' or 'name' must be provided")); + } + + #[test] + fn test_dependency_serialize() { + let dep = Dependency { + schema_url: SchemaUrl::new("https://opentelemetry.io/schemas/1.0.0".to_owned()), + registry_path: None, + name: None, + }; + + let yaml = serde_yaml::to_string(&dep).expect("Failed to serialize"); + // Verify schema_url is serialized + assert!(yaml.contains("schema_url")); + assert!(yaml.contains("https://opentelemetry.io/schemas/1.0.0")); + // Verify name is NOT serialized (skip_serializing) + assert!(!yaml.contains("name:")); + } + + #[test] + fn test_dependency_serialize_with_registry_path() { + let dep = Dependency { + schema_url: SchemaUrl::new("https://opentelemetry.io/schemas/1.0.0".to_owned()), + registry_path: Some(VirtualDirectoryPath::LocalFolder { + path: "./registry".to_owned(), + }), + name: None, + }; + + let yaml = serde_yaml::to_string(&dep).expect("Failed to serialize"); + assert!(yaml.contains("schema_url")); + assert!(yaml.contains("registry_path")); + } + + #[test] + fn test_dependency_serialize_without_optional_path() { + let dep = Dependency { + schema_url: SchemaUrl::new("https://opentelemetry.io/schemas/1.0.0".to_owned()), + registry_path: None, + name: None, + }; + + let yaml = serde_yaml::to_string(&dep).expect("Failed to serialize"); + // registry_path should not be serialized when None (skip_serializing_if) + assert!(!yaml.contains("registry_path")); + } + + #[test] + fn test_dependency_roundtrip_serialization() { + let original = Dependency { + schema_url: SchemaUrl::new("https://example.com/schemas/1.0.0".to_owned()), + registry_path: Some(VirtualDirectoryPath::LocalFolder { + path: "./test/registry".to_owned(), + }), + name: None, + }; + + let yaml = serde_yaml::to_string(&original).expect("Failed to serialize"); + let deserialized: Dependency = serde_yaml::from_str(&yaml).expect("Failed to deserialize"); + + assert_eq!(original.schema_url, deserialized.schema_url); + assert!(deserialized.registry_path.is_some()); + } } diff --git a/crates/weaver_semconv/src/registry.rs b/crates/weaver_semconv/src/registry.rs index 3fd760d4b..1fe8ee1c6 100644 --- a/crates/weaver_semconv/src/registry.rs +++ b/crates/weaver_semconv/src/registry.rs @@ -5,9 +5,10 @@ use crate::attribute::AttributeSpecWithProvenance; use crate::group::{GroupSpecWithProvenance, ImportsWithProvenance}; use crate::json_schema::JsonSchemaValidator; -use crate::manifest::{RegistryManifest, SchemaUrl}; +use crate::manifest::RegistryManifest; use crate::provenance::Provenance; use crate::registry_repo::RegistryRepo; +use crate::schema_url::SchemaUrl; use crate::semconv::{SemConvSpecV1WithProvenance, SemConvSpecWithProvenance}; use crate::stats::Stats; use crate::Error; @@ -285,10 +286,10 @@ impl SemConvRegistry { mod tests { use crate::attribute::{AttributeSpec, AttributeType, PrimitiveOrArrayTypeSpec}; use crate::group::{GroupSpec, GroupType}; - use crate::manifest::SchemaUrl; use crate::provenance::Provenance; use crate::registry::SemConvRegistry; use crate::registry_repo::RegistryRepo; + use crate::schema_url::SchemaUrl; use crate::semconv::{SemConvSpec, SemConvSpecV1, SemConvSpecWithProvenance}; use crate::Error; diff --git a/crates/weaver_semconv/src/registry_repo.rs b/crates/weaver_semconv/src/registry_repo.rs index e07628534..3a05732b4 100644 --- a/crates/weaver_semconv/src/registry_repo.rs +++ b/crates/weaver_semconv/src/registry_repo.rs @@ -6,7 +6,8 @@ use std::default::Default; use std::path::{Path, PathBuf}; use std::sync::Arc; -use crate::manifest::{Dependency, RegistryManifest, SchemaUrl}; +use crate::manifest::{Dependency, RegistryManifest}; +use crate::schema_url::SchemaUrl; use crate::Error; use weaver_common::vdir::{VirtualDirectory, VirtualDirectoryPath}; use weaver_common::{get_path_type, log_info, log_warn}; diff --git a/crates/weaver_semconv/src/schema_url.rs b/crates/weaver_semconv/src/schema_url.rs new file mode 100644 index 000000000..2aceb7eab --- /dev/null +++ b/crates/weaver_semconv/src/schema_url.rs @@ -0,0 +1,356 @@ +// SPDX-License-Identifier: Apache-2.0 + +//! Schema URL type for uniquely identifying semantic convention registries. + +use schemars::JsonSchema; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use std::sync::OnceLock; + +/// Represents the schema URL of a registry, which serves as a unique identifier for the registry +/// along with its version. +#[derive(Debug, Clone, JsonSchema)] +pub struct SchemaUrl { + /// The schema URL string. + pub url: String, + #[serde(skip)] + #[schemars(skip)] + name: OnceLock, + #[serde(skip)] + #[schemars(skip)] + version: OnceLock, +} + +impl SchemaUrl { + /// Create a new SchemaUrl from a string. + #[must_use] + pub fn new(url: String) -> Self { + Self { + url, + name: OnceLock::new(), + version: OnceLock::new(), + } + } + + /// Get the URL as a string. + pub fn as_str(&self) -> &str { + &self.url + } + + /// Validate the schema URL format. + pub fn validate(&self) -> Result<(), String> { + let parsed = url::Url::parse(&self.url).map_err(|e| format!("Invalid schema URL: {e}"))?; + let has_path = parsed + .path_segments() + .map(|segments| segments.filter(|s| !s.is_empty()).count() > 0) + .unwrap_or(false); + + if !has_path { + return Err("The schema URL must have at least one path segment.".to_owned()); + } + Ok(()) + } + + /// Returns the registry name, derived from the schema URL. + #[must_use] + pub fn name(&self) -> &str { + self.name.get_or_init(|| { + let parsed_url = url::Url::parse(&self.url).expect("schema_url must be valid"); + let path = parsed_url.path().trim_matches('/'); + let mut segments: Vec<&str> = path.split('/').collect(); + if !segments.is_empty() { + _ = segments.pop(); + } + + if segments.is_empty() { + return parsed_url.authority().to_owned(); + } + + format!("{}/{}", parsed_url.authority(), segments.join("/")) + }) + } + + /// Returns the registry version, derived from the schema URL. + #[must_use] + pub fn version(&self) -> &str { + self.version.get_or_init(|| { + let parsed_url = url::Url::parse(&self.url).expect("schema_url must be valid"); + parsed_url + .path() + .trim_matches('/') + .rsplit('/') + .next() + .unwrap_or("") + .to_owned() + }) + } + + /// Create a SchemaUrl from name and version. + pub fn from_name_version(name: &str, version: &str) -> Result { + // TODO: replace with scheme regex + let schema_url = SchemaUrl::new( + if name.starts_with("http://") || name.starts_with("https://") { + format!("{}/{}", name.trim_end_matches('/'), version) + } else { + format!("https://{}/{}", name.trim_end_matches('/'), version) + }, + ); + + schema_url.validate()?; + Ok(schema_url) + } +} + +impl PartialEq for SchemaUrl { + fn eq(&self, other: &Self) -> bool { + self.url == other.url + } +} + +impl Eq for SchemaUrl {} + +impl std::hash::Hash for SchemaUrl { + fn hash(&self, state: &mut H) { + self.url.hash(state); + } +} + +impl std::fmt::Display for SchemaUrl { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.url) + } +} + +impl<'de> Deserialize<'de> for SchemaUrl { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let s = String::deserialize(deserializer)?; + Ok(SchemaUrl::new(s)) + } +} + +impl Serialize for SchemaUrl { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_str(&self.url) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_new_and_as_str() { + let url = "https://opentelemetry.io/schemas/1.0.0"; + let schema_url = SchemaUrl::new(url.to_owned()); + assert_eq!(schema_url.as_str(), url); + } + + #[test] + fn test_validate_valid_url() { + let schema_url = SchemaUrl::new("https://opentelemetry.io/schemas/1.0.0".to_owned()); + assert!(schema_url.validate().is_ok()); + } + + #[test] + fn test_validate_invalid_url_syntax() { + let schema_url = SchemaUrl::new("not a valid url".to_owned()); + assert!(schema_url.validate().is_err()); + } + + #[test] + fn test_validate_url_without_path() { + let schema_url = SchemaUrl::new("https://opentelemetry.io".to_owned()); + let result = schema_url.validate(); + assert!(result.is_err()); + assert!(result.unwrap_err().contains("at least one path segment")); + } + + #[test] + fn test_name_extraction_simple() { + let schema_url = SchemaUrl::new("https://opentelemetry.io/schemas/1.0.0".to_owned()); + assert_eq!(schema_url.name(), "opentelemetry.io/schemas"); + } + + #[test] + fn test_name_extraction_nested_path() { + let schema_url = + SchemaUrl::new("https://opentelemetry.io/schemas/sub-component/1.0.0".to_owned()); + assert_eq!(schema_url.name(), "opentelemetry.io/schemas/sub-component"); + } + + #[test] + fn test_name_extraction_single_segment() { + let schema_url = SchemaUrl::new("https://opentelemetry.io/1.0.0".to_owned()); + assert_eq!(schema_url.name(), "opentelemetry.io"); + } + + #[test] + fn test_name_extraction_with_port() { + let schema_url = SchemaUrl::new("https://example.com:8080/schemas/1.0.0".to_owned()); + assert_eq!(schema_url.name(), "example.com:8080/schemas"); + } + + #[test] + fn test_version_extraction_simple() { + let schema_url = SchemaUrl::new("https://opentelemetry.io/schemas/1.0.0".to_owned()); + assert_eq!(schema_url.version(), "1.0.0"); + } + + #[test] + fn test_version_extraction_semantic_version() { + let schema_url = SchemaUrl::new("https://example.com/schemas/1.2.3".to_owned()); + assert_eq!(schema_url.version(), "1.2.3"); + } + + #[test] + fn test_version_extraction_single_segment() { + let schema_url = SchemaUrl::new("https://example.com/v1".to_owned()); + assert_eq!(schema_url.version(), "v1"); + } + + #[test] + fn test_from_name_version_with_https() { + let result = SchemaUrl::from_name_version("https://opentelemetry.io/schemas", "1.0.0"); + assert!(result.is_ok()); + let schema_url = result.unwrap(); + assert_eq!( + schema_url.as_str(), + "https://opentelemetry.io/schemas/1.0.0" + ); + } + + #[test] + fn test_from_name_version_without_scheme() { + let result = SchemaUrl::from_name_version("opentelemetry.io/schemas", "1.0.0"); + assert!(result.is_ok()); + let schema_url = result.unwrap(); + assert_eq!( + schema_url.as_str(), + "https://opentelemetry.io/schemas/1.0.0" + ); + } + + #[test] + fn test_from_name_version_with_http() { + let result = SchemaUrl::from_name_version("http://example.com/schemas", "1.0.0"); + assert!(result.is_ok()); + let schema_url = result.unwrap(); + assert_eq!(schema_url.as_str(), "http://example.com/schemas/1.0.0"); + } + + #[test] + fn test_from_name_version_with_trailing_slash() { + let result = SchemaUrl::from_name_version("https://example.com/schemas/", "1.0.0"); + assert!(result.is_ok()); + let schema_url = result.unwrap(); + assert_eq!(schema_url.as_str(), "https://example.com/schemas/1.0.0"); + } + + #[test] + fn test_equality() { + let url1 = SchemaUrl::new("https://example.com/schemas/1.0.0".to_owned()); + let url2 = SchemaUrl::new("https://example.com/schemas/1.0.0".to_owned()); + let url3 = SchemaUrl::new("https://example.com/schemas/2.0.0".to_owned()); + + assert_eq!(url1, url2); + assert_ne!(url1, url3); + } + + #[test] + fn test_hash() { + use std::collections::hash_map::DefaultHasher; + use std::hash::{Hash, Hasher}; + + let url1 = SchemaUrl::new("https://example.com/schemas/1.0.0".to_owned()); + let url2 = SchemaUrl::new("https://example.com/schemas/1.0.0".to_owned()); + + let mut hasher1 = DefaultHasher::new(); + url1.hash(&mut hasher1); + let hash1 = hasher1.finish(); + + let mut hasher2 = DefaultHasher::new(); + url2.hash(&mut hasher2); + let hash2 = hasher2.finish(); + + assert_eq!(hash1, hash2); + } + + #[test] + fn test_display() { + let schema_url = SchemaUrl::new("https://example.com/schemas/1.0.0".to_owned()); + assert_eq!( + format!("{}", schema_url), + "https://example.com/schemas/1.0.0" + ); + } + + #[test] + fn test_serialize() { + let schema_url = SchemaUrl::new("https://example.com/schemas/1.0.0".to_owned()); + let json = serde_json::to_string(&schema_url).unwrap(); + assert_eq!(json, "\"https://example.com/schemas/1.0.0\""); + } + + #[test] + fn test_deserialize() { + let json = "\"https://example.com/schemas/1.0.0\""; + let schema_url: SchemaUrl = serde_json::from_str(json).unwrap(); + assert_eq!(schema_url.as_str(), "https://example.com/schemas/1.0.0"); + } + + #[test] + fn test_serialize_deserialize_roundtrip() { + let original = SchemaUrl::new("https://opentelemetry.io/schemas/1.0.0".to_owned()); + let json = serde_json::to_string(&original).unwrap(); + let deserialized: SchemaUrl = serde_json::from_str(&json).unwrap(); + assert_eq!(original, deserialized); + } + + #[test] + fn test_name_caching() { + let schema_url = SchemaUrl::new("https://opentelemetry.io/schemas/1.0.0".to_owned()); + + // Call name() twice and verify they return the same reference + let name1 = schema_url.name(); + let name2 = schema_url.name(); + + assert_eq!(name1, name2); + assert_eq!(name1, "opentelemetry.io/schemas"); + + // Verify we're getting the same pointer (cached value) + assert_eq!(name1.as_ptr(), name2.as_ptr()); + } + + #[test] + fn test_version_caching() { + let schema_url = SchemaUrl::new("https://opentelemetry.io/schemas/1.0.0".to_owned()); + + // Call version() twice and verify they return the same reference + let version1 = schema_url.version(); + let version2 = schema_url.version(); + + assert_eq!(version1, version2); + assert_eq!(version1, "1.0.0"); + + // Verify we're getting the same pointer (cached value) + assert_eq!(version1.as_ptr(), version2.as_ptr()); + } + + #[test] + fn test_clone_preserves_url_but_resets_cache() { + let original = SchemaUrl::new("https://opentelemetry.io/schemas/1.0.0".to_owned()); + + // Access name to populate cache + let _ = original.name(); + + // Clone should have the same URL but empty cache + let cloned = original.clone(); + assert_eq!(original.as_str(), cloned.as_str()); + assert_eq!(original.name(), cloned.name()); + } +} diff --git a/crates/weaver_semconv_gen/src/v2.rs b/crates/weaver_semconv_gen/src/v2.rs index 586080a7a..7393b10ea 100644 --- a/crates/weaver_semconv_gen/src/v2.rs +++ b/crates/weaver_semconv_gen/src/v2.rs @@ -413,7 +413,7 @@ mod tests { use weaver_semconv::{ attribute::{AttributeType, PrimitiveOrArrayTypeSpec}, group::InstrumentSpec, - manifest::SchemaUrl, + schema_url::SchemaUrl, v2::{span::SpanName, CommonFields}, }; diff --git a/tests/resolution_process.rs b/tests/resolution_process.rs index 3d7c009d5..f428c3280 100644 --- a/tests/resolution_process.rs +++ b/tests/resolution_process.rs @@ -6,7 +6,7 @@ use miette::Diagnostic; use weaver_common::vdir::VirtualDirectoryPath; use weaver_resolver::SchemaResolver; -use weaver_semconv::{manifest::SchemaUrl, registry_repo::RegistryRepo}; +use weaver_semconv::{registry_repo::RegistryRepo, schema_url::SchemaUrl}; /// The URL of the official semantic convention registry. const SEMCONV_REGISTRY_URL: &str = "https://github.com/open-telemetry/semantic-conventions.git"; @@ -34,7 +34,9 @@ fn test_cli_interface() { refspec: None, }; - let schema_url = Some(SchemaUrl::new("https://opelemetry.io/schemas/1.40.0".to_owned())); + let schema_url = Some(SchemaUrl::new( + "https://opelemetry.io/schemas/1.40.0".to_owned(), + )); let registry_repo = RegistryRepo::try_new(schema_url, ®istry_path).unwrap_or_else(|e| { panic!("Failed to create the registry repo, error: {e}"); }); From 4d8b90f17597f6268a4471e03b47e832ea9260f5 Mon Sep 17 00:00:00 2001 From: Liudmila Molkova Date: Fri, 13 Feb 2026 13:46:41 -0800 Subject: [PATCH 07/27] checkpoint: manifest renames --- Cargo.lock | 1 + crates/weaver_resolved_schema/src/lib.rs | 4 +- .../published/registry_manifest.yaml | 8 +- crates/weaver_resolver/src/loader.rs | 7 +- crates/weaver_semconv/Cargo.toml | 1 + crates/weaver_semconv/src/manifest.rs | 128 +++++++++++++----- crates/weaver_semconv/src/registry.rs | 10 +- crates/weaver_semconv/src/registry_repo.rs | 50 +++++-- .../3.0.0/registry_manifest.yaml | 2 +- .../tests/published_repository/resolved/1.0.0 | 2 +- .../tests/published_repository/resolved/2.0.0 | 2 +- 11 files changed, 151 insertions(+), 64 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3b7094cba..11ff91ae9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5894,6 +5894,7 @@ dependencies = [ "serde_yaml", "thiserror 2.0.18", "ureq", + "url", "utoipa", "walkdir", "weaver_common", diff --git a/crates/weaver_resolved_schema/src/lib.rs b/crates/weaver_resolved_schema/src/lib.rs index cde4f4947..bc7e371a4 100644 --- a/crates/weaver_resolved_schema/src/lib.rs +++ b/crates/weaver_resolved_schema/src/lib.rs @@ -298,13 +298,13 @@ impl ResolvedTelemetrySchema { if let Some(ref manifest) = self.registry_manifest { changes.set_head_manifest(weaver_version::schema_changes::RegistryManifest { - semconv_version: manifest.version.clone(), + semconv_version: manifest.version().clone(), }); } if let Some(ref manifest) = baseline_schema.registry_manifest { changes.set_baseline_manifest(weaver_version::schema_changes::RegistryManifest { - semconv_version: manifest.version.clone(), + semconv_version: manifest.version().clone(), }); } diff --git a/crates/weaver_resolver/data/registry-test-published-1/published/registry_manifest.yaml b/crates/weaver_resolver/data/registry-test-published-1/published/registry_manifest.yaml index f85d5ef60..6d7cb03d2 100644 --- a/crates/weaver_resolver/data/registry-test-published-1/published/registry_manifest.yaml +++ b/crates/weaver_resolver/data/registry-test-published-1/published/registry_manifest.yaml @@ -1,7 +1,5 @@ file_format: manifest/2.0.0 -name: resolved -description: Test repository that has been resolved. -version: 1.0.0 -repository_url: https://github.com/open-telemetry/weaver.git stability: stable -resolved_schema_url: resolved_schema.yaml +schema_url: https://opentelemetry.io/schemas/1.0.0 +resolved_schema_uri: resolved_schema.yaml +description: Test repository that has been resolved. \ No newline at end of file diff --git a/crates/weaver_resolver/src/loader.rs b/crates/weaver_resolver/src/loader.rs index b00634e54..7d1e8af29 100644 --- a/crates/weaver_resolver/src/loader.rs +++ b/crates/weaver_resolver/src/loader.rs @@ -14,7 +14,7 @@ use weaver_common::result::WResult; use weaver_resolved_schema::v2::ResolvedTelemetrySchema as V2Schema; use weaver_resolved_schema::ResolvedTelemetrySchema as V1Schema; use weaver_semconv::json_schema::JsonSchemaValidator; -use weaver_semconv::registry_repo::{RegistryRepo, REGISTRY_MANIFEST}; +use weaver_semconv::registry_repo::{LEGACY_REGISTRY_MANIFEST, REGISTRY_MANIFEST, RegistryRepo}; use weaver_semconv::{group::ImportsWithProvenance, semconv::SemConvSpecWithProvenance}; use crate::Error; @@ -192,7 +192,7 @@ fn load_semconv_repository_recursive( // Either load a fully resolved repository, or read in raw files. if let Some(manifest) = registry_repo.manifest() { - if let Some(resolved_url) = registry_repo.resolved_schema_url() { + if let Some(resolved_url) = registry_repo.resolved_schema_uri() { load_resolved_repository(&resolved_url) } else { if manifest.dependencies.len() > 1 { @@ -281,6 +281,7 @@ fn load_definition_repository( && (extension == "yaml" || extension == "yml") && file_name != "schema-next.yaml" && file_name != REGISTRY_MANIFEST + && file_name != LEGACY_REGISTRY_MANIFEST } let local_path = registry_repo.path().to_path_buf(); let registry_path_repr = registry_repo.registry_path_repr(); @@ -481,7 +482,7 @@ mod tests { WResult::FatalErr(fatal) => { let error_msg = fatal.to_string(); assert!( - error_msg.contains("Circular dependency detected") && + error_msg.contains("Circular dependency detected") && error_msg.contains("registry_a") && error_msg.contains("registry_b"), "Expected circular dependency error mentioning both registries, got: {error_msg}" diff --git a/crates/weaver_semconv/Cargo.toml b/crates/weaver_semconv/Cargo.toml index 50cff5ad9..f4996ab66 100644 --- a/crates/weaver_semconv/Cargo.toml +++ b/crates/weaver_semconv/Cargo.toml @@ -29,6 +29,7 @@ regex.workspace = true globset.workspace = true itertools.workspace = true log.workspace = true +url.workspace = true glob = "0.3.3" jsonschema = "0.40.0" # JSON Schema validation used to enhance error messages diff --git a/crates/weaver_semconv/src/manifest.rs b/crates/weaver_semconv/src/manifest.rs index 3619cd073..548d54cf9 100644 --- a/crates/weaver_semconv/src/manifest.rs +++ b/crates/weaver_semconv/src/manifest.rs @@ -25,12 +25,16 @@ use weaver_common::vdir::VirtualDirectoryPath; pub struct RegistryManifest { /// The file format for this registry. /// - /// No value is assumed to be `definition/1.0.0` + /// No value is assumed to be `manifest/2.0.0` #[serde(skip_serializing_if = "Option::is_none", default)] pub file_format: Option, - /// The name of the registry. This name is used to define the package name. - pub name: String, + /// The schema URL for this registry. + /// This URL is populated before registry is published and is used as + /// a unique identifier of the registry. It MUST follow OTel schema URL format, which is: + /// `http[s]://server[:port]/path/`. + /// See https://github.com/open-telemetry/opentelemetry-specification/blob/v1.53.0/specification/schemas/README.md#schema-url for more details. + pub schema_url: Option, /// An optional description of the registry. /// @@ -41,12 +45,18 @@ pub struct RegistryManifest { pub description: Option, /// The version of the registry which will be used to define the semconv package version. - #[serde(alias = "semconv_version")] - pub version: String, + #[serde(skip_serializing_if = "Option::is_none", default)] + #[deprecated( + note = "The `version` field is deprecated. The registry version should be specified in the `schema_url` field, which is required and serves as a unique identifier for the registry." + )] + pub semconv_version: Option, /// The base URL where the registry's schema files are hosted. - #[serde(alias = "schema_base_url")] - pub repository_url: String, + #[serde(skip_serializing_if = "Option::is_none", default)] + #[deprecated( + note = "The `schema_base_url` field is deprecated. The registry schema URL should be specified in the `schema_url` field, which is required and serves as a unique identifier for the registry." + )] + pub schema_base_url: Option, /// List of the registry's dependencies. /// Note: In the current phase, we only support zero or one dependency. @@ -60,7 +70,7 @@ pub struct RegistryManifest { /// The location of the resolved telemetry schema, if available. #[serde(skip_serializing_if = "Option::is_none")] - pub resolved_schema_url: Option, + pub resolved_schema_uri: Option, } /// Represents a dependency of a semantic convention registry. @@ -94,7 +104,7 @@ impl RegistryManifest { error: e.to_string(), })?; let reader = std::io::BufReader::new(file); - let manifest: RegistryManifest = + let mut manifest: RegistryManifest = serde_yaml::from_reader(reader).map_err(|e| InvalidRegistryManifest { path: manifest_path_buf.clone(), error: e.to_string(), @@ -102,37 +112,94 @@ impl RegistryManifest { manifest.validate(manifest_path_buf.clone())?; + // If the schema URL is not provided, populate it using deprecated schema_base_url and semconv_version + // validation would fail if they were not provided + if manifest.schema_url.is_none() { + manifest.schema_url = Some(format!( + "{}/{}", + manifest.schema_base_url.clone().unwrap_or_default(), + manifest.semconv_version.clone().unwrap_or_default() + )); + } + Ok(manifest) } fn validate(&self, path: PathBuf) -> Result<(), Error> { let mut errors = vec![]; - if self.name.is_empty() { - errors.push(InvalidRegistryManifest { - path: path.clone(), - error: "The registry name is required.".to_owned(), - }); - } - - if self.version.is_empty() { - errors.push(InvalidRegistryManifest { - path: path.clone(), - error: "The registry version is required.".to_owned(), - }); - } + let schema_url_empty = self.schema_url.as_ref().map_or(true, |url| url.is_empty()); + let schema_base_url_empty = self.schema_base_url.as_ref().map_or(true, |url| url.is_empty()); + let semconv_version_empty = self.semconv_version.as_ref().map_or(true, |v| v.is_empty()); - if self.repository_url.is_empty() { - errors.push(InvalidRegistryManifest { - path: path.clone(), - error: "The registry schema base URL is required.".to_owned(), - }); + if schema_url_empty { + if schema_base_url_empty || semconv_version_empty { + errors.push(InvalidRegistryManifest { + path: path.clone(), + error: "The registry schema URL is required.".to_owned(), + }); + } else { + // schema_base_url should be a valid absolute URL, otherwise push an error to the list. + if let Err(e) = url::Url::parse(self.schema_base_url.as_ref().unwrap()) { + errors.push(InvalidRegistryManifest { + path: path.clone(), + error: format!("Invalid schema base URL: {}", e), + }); + } + } + } else { + // validate the resolved schema URL: it must be a valid absolute URI with at least one path segment + match url::Url::parse(self.schema_url.as_ref().unwrap()) { + Ok(parsed_url) => { + if parsed_url.path_segments().map(|c| c.count()).unwrap_or(0) == 0 { + errors.push(InvalidRegistryManifest { + path: path.clone(), + error: "The registry schema URL must have at least one path segment.".to_owned(), + }); + } + } + Err(e) => { + errors.push(InvalidRegistryManifest { + path: path.clone(), + error: format!("Invalid schema URL: {}", e), + }); + } + } } handle_errors(errors)?; - Ok(()) } + + /// Returns the registry name, which is derived from the schema URL. + /// For example, if the schema URL is `https://opentelemetry.io/schemas/sub-component/1.0.0`, + /// the registry name would be `opentelemetry.io/schemas/sub-component` + pub fn name(&self) -> String { + let schema_url = self.schema_url.as_ref().expect("schema_url was validated"); + let parsed_url = url::Url::parse(schema_url).expect("schema_url was validated"); + let authority = parsed_url.host_str().unwrap_or_default(); + let path = parsed_url.path().trim_matches('/'); + let mut segments: Vec<&str> = path.split('/').collect(); + if !segments.is_empty() { + _ = segments.pop(); + } + format!("{}/{}", authority, segments.join("/")) + } + + /// Returns the registry version, which is derived from the schema URL. + /// For example, if the schema URL is `https://opentelemetry.io/schemas/sub-component/1.0.0`, + /// the registry version would be `1.0.0` + pub fn version(&self) -> String { + let schema_url = self.schema_url.as_ref().expect("schema_url was validated"); + let parsed_url = url::Url::parse(schema_url).expect("schema_url was validated"); + parsed_url + .path() + .trim_matches('/') + .rsplit('/') + .next() + .unwrap_or("") + .to_string() + } } #[cfg(test)] @@ -163,9 +230,8 @@ mod tests { let config = RegistryManifest::try_from_file("tests/test_data/valid_semconv_registry_manifest.yaml") .expect("Failed to load the registry configuration file."); - assert_eq!(config.name, "vendor_acme"); - assert_eq!(config.version, "0.1.0"); - assert_eq!(config.repository_url, "https://acme.com/schemas/"); + assert_eq!(config.name(), "vendor_acme"); + assert_eq!(config.version(), "0.1.0"); } #[test] diff --git a/crates/weaver_semconv/src/registry.rs b/crates/weaver_semconv/src/registry.rs index 3ee47d960..7e33fa5b2 100644 --- a/crates/weaver_semconv/src/registry.rs +++ b/crates/weaver_semconv/src/registry.rs @@ -145,12 +145,12 @@ impl SemConvRegistry { registry.set_manifest(RegistryManifest { file_format: None, - name: registry_repo.id().as_ref().to_owned(), - description: None, - version: semconv_version, - repository_url: "".to_owned(), + schema_url: registry_repo.manifest().and_then(|m| Some(m.schema_url.clone())).unwrap_or_default(), + schema_base_url: registry_repo.manifest().and_then(|m| m.schema_base_url.clone()), + semconv_version: registry_repo.manifest().and_then(|m| m.semconv_version.clone()), + description: registry_repo.manifest().and_then(|m| m.description.clone()), dependencies: vec![], - resolved_schema_url: None, + resolved_schema_uri: None, stability: crate::stability::Stability::Development, }); } else { diff --git a/crates/weaver_semconv/src/registry_repo.rs b/crates/weaver_semconv/src/registry_repo.rs index 22c7c0eb0..c91f89d57 100644 --- a/crates/weaver_semconv/src/registry_repo.rs +++ b/crates/weaver_semconv/src/registry_repo.rs @@ -9,10 +9,16 @@ use std::sync::Arc; use crate::manifest::RegistryManifest; use crate::Error; use weaver_common::vdir::{VirtualDirectory, VirtualDirectoryPath}; -use weaver_common::{get_path_type, log_info}; +use weaver_common::{get_path_type, log_info, log_warn}; + +/// The name of the legacy registry manifest file. +#[deprecated( + note = "The registry manifest file is renamed to `manifest.yaml`." +)] +pub const LEGACY_REGISTRY_MANIFEST: &str = "registry_manifest.yaml"; /// The name of the registry manifest file. -pub const REGISTRY_MANIFEST: &str = "registry_manifest.yaml"; +pub const REGISTRY_MANIFEST: &str = "manifest.yaml"; /// A semantic convention registry repository that can be: /// - A definition repository, which is one of: @@ -48,7 +54,7 @@ impl RegistryRepo { }; if let Some(manifest) = registry_repo.manifest_path() { let registry_manifest = RegistryManifest::try_from_file(manifest)?; - registry_repo.id = Arc::from(registry_manifest.name.as_str()); + registry_repo.id = Arc::from(registry_manifest.name().as_str()); registry_repo.manifest = Some(registry_manifest); } Ok(registry_repo) @@ -78,27 +84,27 @@ impl RegistryRepo { self.manifest.as_ref() } - /// Returns the resolved schema URL, if available in the manifest. + /// Returns the resolved schema URI, if available in the manifest. #[must_use] - pub fn resolved_schema_url(&self) -> Option { + pub fn resolved_schema_uri(&self) -> Option { let manifest = self.manifest.as_ref()?; - let resolved_url: &str = manifest.resolved_schema_url.as_ref()?; - match get_path_type(resolved_url) { + let resolved_uri: &str = manifest.resolved_schema_uri.as_ref()?; + match get_path_type(resolved_uri) { weaver_common::PathType::RelativePath => { - // We need to understand if the manifest URL is the same as the registry URL. + // We need to understand if the manifest URI is the same as the registry URI. let vdir_was_manifest_file = self.manifest_path()? == self.registry.path(); Some(self.registry.vdir_path().map_sub_folder(|path| { if vdir_was_manifest_file { match Path::new(&path).parent() { - Some(parent) => format!("{}/{resolved_url}", parent.display()), + Some(parent) => format!("{}/{resolved_uri}", parent.display()), None => "".to_owned(), } } else { - format!("{path}/{resolved_url}") + format!("{path}/{resolved_uri}") } })) } - _ => resolved_url.try_into().ok(), + _ => resolved_uri.try_into().ok(), } } @@ -111,12 +117,20 @@ impl RegistryRepo { return Some(self.registry.path().to_path_buf()); } let manifest_path = self.registry.path().join(REGISTRY_MANIFEST); + let legacy_path = self.registry.path().join(LEGACY_REGISTRY_MANIFEST); if manifest_path.exists() { log_info(format!( "Found registry manifest: {}", manifest_path.display() )); Some(manifest_path) + } else if legacy_path.exists() { + log_warn(format!( + "Found registry manifest: {}. Please rename file to {}, as the old name is deprecated and won't be supported in future versions.", + legacy_path.display(), + REGISTRY_MANIFEST + )); + Some(legacy_path) } else { log_info(format!( "No registry manifest found: {}", @@ -125,6 +139,12 @@ impl RegistryRepo { None } } + + /// Returns the registry schema URL, if available in the manifest. + #[must_use] + pub fn schema_url(&self) -> Option { + self.manifest.as_ref().and_then(|manifest| manifest.schema_url.clone()) + } } #[cfg(test)] @@ -171,9 +191,9 @@ mod tests { let Some(manifest) = repo.manifest() else { panic!("Did not resolve manifest for repo: {repo:?}"); }; - assert_eq!(manifest.name, "resolved"); + assert_eq!(manifest.name(), "resolved"); - let Some(resolved_path) = repo.resolved_schema_url() else { + let Some(resolved_path) = repo.resolved_schema_uri() else { panic!( "Should find a resolved schema path from manifest in {}", repo.registry_path_repr() @@ -190,7 +210,7 @@ mod tests { }; let repo = RegistryRepo::try_new("main", ®istry_path).expect("Failed to load test repository."); - let Some(resolved_path) = repo.resolved_schema_url() else { + let Some(resolved_path) = repo.resolved_schema_uri() else { panic!( "Should find a resolved schema path from manifest in {}", repo.registry_path_repr() @@ -204,7 +224,7 @@ mod tests { }; let repo = RegistryRepo::try_new("main", ®istry_path).expect("Failed to load test repository."); - let Some(resolved_path) = repo.resolved_schema_url() else { + let Some(resolved_path) = repo.resolved_schema_uri() else { panic!( "Should find a resolved schema path from manifest in {}", repo.registry_path_repr() diff --git a/crates/weaver_semconv/tests/published_repository/3.0.0/registry_manifest.yaml b/crates/weaver_semconv/tests/published_repository/3.0.0/registry_manifest.yaml index eb2ca0198..c4fc26958 100644 --- a/crates/weaver_semconv/tests/published_repository/3.0.0/registry_manifest.yaml +++ b/crates/weaver_semconv/tests/published_repository/3.0.0/registry_manifest.yaml @@ -4,4 +4,4 @@ description: Test repository that has been resolved. version: 3.0.0 repository_url: https://github.com/open-telemetry/weaver.git stability: stable -resolved_schema_url: resolved_schema.yaml +resolved_schema_uri: resolved_schema.yaml diff --git a/crates/weaver_semconv/tests/published_repository/resolved/1.0.0 b/crates/weaver_semconv/tests/published_repository/resolved/1.0.0 index 1dc1d84e7..aa8518c7b 100644 --- a/crates/weaver_semconv/tests/published_repository/resolved/1.0.0 +++ b/crates/weaver_semconv/tests/published_repository/resolved/1.0.0 @@ -4,4 +4,4 @@ description: Test repository that has been resolved. version: 1.0.0 repository_url: https://github.com/open-telemetry/weaver.git stability: stable -resolved_schema_url: resolved_1.0.0.yaml +resolved_schema_uri: resolved_1.0.0.yaml diff --git a/crates/weaver_semconv/tests/published_repository/resolved/2.0.0 b/crates/weaver_semconv/tests/published_repository/resolved/2.0.0 index 681fa6400..91bbc3a6b 100644 --- a/crates/weaver_semconv/tests/published_repository/resolved/2.0.0 +++ b/crates/weaver_semconv/tests/published_repository/resolved/2.0.0 @@ -4,4 +4,4 @@ description: Test repository that has been resolved. version: 2.0.0 repository_url: https://github.com/open-telemetry/weaver.git stability: stable -resolved_schema_url: https://github.com/open-telemetry/weaver.git\creates/weaver_semconv/tests/published_respository/resolved/resolved_2.0.0 +resolved_schema_uri: https://github.com/open-telemetry/weaver.git\creates/weaver_semconv/tests/published_respository/resolved/resolved_2.0.0 From c9fdf1cecb9f0f0992ab6f4f7285cea8aa686e58 Mon Sep 17 00:00:00 2001 From: Liudmila Molkova Date: Fri, 13 Feb 2026 18:11:40 -0800 Subject: [PATCH 08/27] another checkpoint --- crates/weaver_codegen_test/build.rs | 4 +- crates/weaver_emit/src/lib.rs | 3 +- crates/weaver_forge/src/lib.rs | 15 +- crates/weaver_forge/src/v2/registry.rs | 14 +- crates/weaver_live_check/src/live_checker.rs | 14 +- crates/weaver_mcp/src/service.rs | 3 +- crates/weaver_resolved_schema/src/lib.rs | 47 ++-- crates/weaver_resolved_schema/src/v2/mod.rs | 27 +-- .../weaver_resolved_schema/src/v2/registry.rs | 6 - .../registry_a/registry_manifest.yaml | 2 +- .../registry_b/registry_manifest.yaml | 4 +- .../app_registry/registry_manifest.yaml | 2 +- .../custom_registry/registry_manifest.yaml | 2 +- .../expected-registry.json | 4 +- .../published/resolved_schema.yaml | 4 +- .../registry/registry_manifest.yaml | 2 +- crates/weaver_resolver/src/attribute.rs | 2 +- crates/weaver_resolver/src/dependency.rs | 12 +- crates/weaver_resolver/src/error.rs | 19 +- crates/weaver_resolver/src/lib.rs | 39 ++-- crates/weaver_resolver/src/loader.rs | 69 +++--- crates/weaver_resolver/src/registry.rs | 10 +- crates/weaver_search/src/lib.rs | 3 +- crates/weaver_semconv/src/manifest.rs | 201 +++++++++++++----- crates/weaver_semconv/src/registry.rs | 18 +- crates/weaver_semconv/src/registry_repo.rs | 112 +++++++--- .../3.0.0/registry_manifest.yaml | 3 +- .../tests/published_repository/resolved/1.0.0 | 3 +- .../tests/published_repository/resolved/2.0.0 | 3 +- .../weaver_semconv_gen/data_v2/templates.md | 4 +- crates/weaver_semconv_gen/src/v1.rs | 2 +- crates/weaver_semconv_gen/src/v2.rs | 5 +- src/registry/check.rs | 2 +- src/registry/diff.rs | 9 +- src/registry/resolve.rs | 3 +- src/serve/handlers.rs | 2 +- src/serve/types.rs | 4 +- src/weaver.rs | 3 +- tests/custom_registry/registry_manifest.yaml | 2 +- tests/registry_stats.rs | 6 +- tests/resolution_process.rs | 11 +- 41 files changed, 447 insertions(+), 253 deletions(-) diff --git a/crates/weaver_codegen_test/build.rs b/crates/weaver_codegen_test/build.rs index 7681d0deb..8183376e2 100644 --- a/crates/weaver_codegen_test/build.rs +++ b/crates/weaver_codegen_test/build.rs @@ -42,8 +42,8 @@ fn main() { let registry_path = VirtualDirectoryPath::LocalFolder { path: SEMCONV_REGISTRY_PATH.into(), }; - let registry_repo = - RegistryRepo::try_new("main", ®istry_path).unwrap_or_else(|e| process_error(&logger, e)); + let registry_repo = RegistryRepo::try_new(Some("main"), Some("1.0.0"), ®istry_path) + .unwrap_or_else(|e| process_error(&logger, e)); let loaded = SchemaResolver::load_semconv_repository(registry_repo, FOLLOW_SYMLINKS) .ignore(|e| matches!(e.severity(), Some(miette::Severity::Warning))) .into_result_failing_non_fatal() diff --git a/crates/weaver_emit/src/lib.rs b/crates/weaver_emit/src/lib.rs index 1f75880ef..1c8477d0f 100644 --- a/crates/weaver_emit/src/lib.rs +++ b/crates/weaver_emit/src/lib.rs @@ -268,6 +268,7 @@ mod tests { use weaver_semconv::{ attribute::{AttributeType, Examples, PrimitiveOrArrayTypeSpec, RequirementLevel}, group::{GroupType, InstrumentSpec, SpanKindSpec}, + manifest::SchemaUrl, stability::Stability, }; @@ -590,7 +591,7 @@ mod tests { }; let registry = ForgeResolvedRegistry { - registry_url: "TEST_V2".to_owned(), + schema_url: SchemaUrl("https://example.com/schemas/1.2.3".to_owned()), registry: Registry { attributes: vec![], attribute_groups: vec![], diff --git a/crates/weaver_forge/src/lib.rs b/crates/weaver_forge/src/lib.rs index 47b673ed6..f432ed2f2 100644 --- a/crates/weaver_forge/src/lib.rs +++ b/crates/weaver_forge/src/lib.rs @@ -856,11 +856,12 @@ mod tests { ignore_non_fatal_errors: bool, ) -> (TemplateEngine, ResolvedRegistry, PathBuf, PathBuf) { let registry_id = "default"; + let registry_version = "1.0.0"; let path: VirtualDirectoryPath = "data/registry" .try_into() .expect("Invalid virtual directory path string"); - let repo = - RegistryRepo::try_new(registry_id, &path).expect("Failed to construct repository"); + let repo = RegistryRepo::try_new(Some(registry_id), Some(registry_version), &path) + .expect("Failed to construct repository"); let registry_result = SchemaResolver::load_semconv_repository(repo, false); // SemConvRegistry::try_from_path_pattern(registry_id, "data/*.yaml"); let registry = if ignore_non_fatal_errors { @@ -1060,8 +1061,9 @@ mod tests { let path: VirtualDirectoryPath = "data/registry" .try_into() .expect("Invalid virtual directory path string"); - let repo = - RegistryRepo::try_new(registry_id, &path).expect("Failed to construct repository"); + let registry_version = "1.0.0"; + let repo = RegistryRepo::try_new(Some(registry_id), Some(registry_version), &path) + .expect("Failed to construct repository"); let loaded = SchemaResolver::load_semconv_repository(repo, false) .into_result_with_non_fatal() .expect("Failed to load registry") @@ -1190,8 +1192,9 @@ mod tests { let path: VirtualDirectoryPath = "data/mini_registry_for_comments" .try_into() .expect("Invalid virtual directory path string"); - let repo = - RegistryRepo::try_new(registry_id, &path).expect("Failed to construct repository"); + let registry_version = "1.0.0"; + let repo = RegistryRepo::try_new(Some(registry_id), Some(registry_version), &path) + .expect("Failed to construct repository"); let loaded = SchemaResolver::load_semconv_repository(repo, false) .into_result_with_non_fatal() .expect("Failed to load registry") diff --git a/crates/weaver_forge/src/v2/registry.rs b/crates/weaver_forge/src/v2/registry.rs index 7c9595756..cd219d683 100644 --- a/crates/weaver_forge/src/v2/registry.rs +++ b/crates/weaver_forge/src/v2/registry.rs @@ -3,6 +3,7 @@ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use weaver_resolved_schema::{attribute::AttributeRef, v2::catalog::AttributeCatalog}; +use weaver_semconv::manifest::SchemaUrl; use crate::{ error::Error, @@ -24,8 +25,7 @@ use crate::{ #[serde(deny_unknown_fields)] pub struct ForgeResolvedRegistry { /// The semantic convention registry url. - #[serde(skip_serializing_if = "String::is_empty")] - pub registry_url: String, + pub schema_url: SchemaUrl, // TODO - Attribute Groups /// The signals defined in this registry. pub registry: Registry, @@ -413,7 +413,7 @@ impl ForgeResolvedRegistry { } Ok(Self { - registry_url: schema.schema_url.clone(), + schema_url: schema.schema_url.clone(), registry: Registry { attributes, attribute_groups, @@ -448,8 +448,7 @@ mod tests { fn test_try_from_resolved_schema() { let resolved_schema = ResolvedTelemetrySchema { file_format: "2.0.0".to_owned(), - schema_url: "https://example.com/schema".to_owned(), - registry_id: "my-registry".to_owned(), + schema_url: SchemaUrl("https://example.com/schema".to_owned()), attribute_catalog: vec![attribute::Attribute { key: "test.attr".to_owned(), r#type: AttributeType::PrimitiveOrArray(PrimitiveOrArrayTypeSpec::String), @@ -457,7 +456,6 @@ mod tests { common: CommonFields::default(), }], registry: v2::registry::Registry { - registry_url: "https://example.com/registry".to_owned(), attributes: vec![attribute::AttributeRef(0)], spans: vec![span::Span { r#type: SignalId::from("my-span".to_owned()), @@ -613,11 +611,9 @@ mod tests { fn test_try_from_resolved_schema_with_missing_attribute() { let resolved_schema = ResolvedTelemetrySchema { file_format: "2.0.0".to_owned(), - schema_url: "https://example.com/schema".to_owned(), - registry_id: "my-registry".to_owned(), + schema_url: SchemaUrl("https://example.com/schema".to_owned()), attribute_catalog: vec![], registry: v2::registry::Registry { - registry_url: "https://example.com/registry".to_owned(), attributes: vec![], // No attributes - This is the logic bug. spans: vec![span::Span { r#type: SignalId::from("my-span".to_owned()), diff --git a/crates/weaver_live_check/src/live_checker.rs b/crates/weaver_live_check/src/live_checker.rs index 0ad3955f8..f73b47e00 100644 --- a/crates/weaver_live_check/src/live_checker.rs +++ b/crates/weaver_live_check/src/live_checker.rs @@ -185,7 +185,6 @@ mod tests { span::{Span as V2Span, SpanAttribute}, }; use weaver_resolved_schema::attribute::Attribute; - use weaver_semconv::v2::{span::SpanName, CommonFields}; use weaver_semconv::{ attribute::{ AttributeType, BasicRequirementLevelSpec, EnumEntriesSpec, Examples, @@ -195,6 +194,10 @@ mod tests { stability::Stability, YamlValue, }; + use weaver_semconv::{ + manifest::SchemaUrl, + v2::{span::SpanName, CommonFields}, + }; fn get_all_advice(sample: &mut Sample) -> &mut [PolicyFinding] { match sample { @@ -508,7 +511,7 @@ mod tests { fn make_registry(use_v2: bool) -> VersionedRegistry { if use_v2 { VersionedRegistry::V2(ForgeResolvedRegistry { - registry_url: "TEST".to_owned(), + schema_url: SchemaUrl("https://example.com/schemas/1.2.3".to_owned()), registry: Registry { attributes: vec![ V2Attribute { @@ -794,7 +797,7 @@ mod tests { }; VersionedRegistry::V2(ForgeResolvedRegistry { - registry_url: "TEST_METRICS".to_owned(), + schema_url: SchemaUrl("https://example.com/schemas/1.2.3".to_owned()), registry: Registry { attributes: vec![memory_state_attr.clone()], attribute_groups: vec![], @@ -1002,8 +1005,7 @@ mod tests { }; VersionedRegistry::V2(ForgeResolvedRegistry { - registry_url: "TEST".to_owned(), - + schema_url: SchemaUrl("https://example.com/schemas/1.2.3".to_owned()), registry: Registry { attributes: vec![custom_string_attr.clone()], attribute_groups: vec![], @@ -1517,7 +1519,7 @@ mod tests { }; VersionedRegistry::V2(ForgeResolvedRegistry { - registry_url: "TEST_EVENTS".to_owned(), + schema_url: SchemaUrl("https://example.com/schemas/1.2.3".to_owned()), registry: Registry { attributes: vec![session_id_attr.clone(), session_previous_id_attr.clone()], attribute_groups: vec![], diff --git a/crates/weaver_mcp/src/service.rs b/crates/weaver_mcp/src/service.rs index 5f945310f..ea4fc2617 100644 --- a/crates/weaver_mcp/src/service.rs +++ b/crates/weaver_mcp/src/service.rs @@ -386,13 +386,14 @@ mod tests { use weaver_search::SearchType; use weaver_semconv::attribute::AttributeType; use weaver_semconv::group::{InstrumentSpec, SpanKindSpec}; + use weaver_semconv::manifest::SchemaUrl; use weaver_semconv::stability::Stability; use weaver_semconv::v2::span::SpanName; use weaver_semconv::v2::CommonFields; fn make_test_registry() -> ForgeResolvedRegistry { ForgeResolvedRegistry { - registry_url: "test".to_owned(), + schema_url: SchemaUrl("https://todo/1.0.0".to_owned()), registry: Registry { attributes: vec![Attribute { key: "http.request.method".to_owned(), diff --git a/crates/weaver_resolved_schema/src/lib.rs b/crates/weaver_resolved_schema/src/lib.rs index bc7e371a4..655ddbb21 100644 --- a/crates/weaver_resolved_schema/src/lib.rs +++ b/crates/weaver_resolved_schema/src/lib.rs @@ -15,7 +15,7 @@ use serde::{Deserialize, Serialize}; use std::collections::HashMap; use weaver_semconv::deprecated::Deprecated; use weaver_semconv::group::GroupType; -use weaver_semconv::manifest::RegistryManifest; +use weaver_semconv::manifest::{RegistryManifest, SchemaUrl}; use weaver_version::schema_changes::{SchemaChanges, SchemaItemChange, SchemaItemType}; use weaver_version::Versions; @@ -51,10 +51,8 @@ pub(crate) const V2_RESOLVED_FILE_FORMAT: &str = "resolved/2.0.0"; pub struct ResolvedTelemetrySchema { /// Version of the file structure. pub file_format: String, - /// Schema URL that this file is published at. - pub schema_url: String, - /// The ID of the registry that this schema belongs to. - pub registry_id: String, + /// Schema URL that this file is or will be published at. + pub schema_url: SchemaUrl, /// The registry that this schema belongs to. pub registry: Registry, /// Catalog of unique items that are shared across multiple registries @@ -79,7 +77,8 @@ pub struct ResolvedTelemetrySchema { #[serde(skip_serializing_if = "Option::is_none")] pub versions: Option, /// The manifest of the registry. - pub registry_manifest: Option, + #[serde(skip)] + pub manifest: Option, } /// Statistics on a resolved telemetry schema. @@ -94,11 +93,11 @@ pub struct Stats { impl ResolvedTelemetrySchema { /// Create a new resolved telemetry schema. - pub fn new>(schema_url: S, registry_id: S, registry_url: S) -> Self { + pub fn new>(schema_url: S, registry_url: S) -> Self { Self { file_format: V1_RESOLVED_FILE_FORMAT.to_owned(), - schema_url: schema_url.as_ref().to_owned(), - registry_id: registry_id.as_ref().to_owned(), + // TODO: is it correct? + schema_url: SchemaUrl(schema_url.as_ref().to_owned()), registry: Registry::new(registry_url), catalog: Catalog::default(), resource: None, @@ -539,7 +538,7 @@ mod tests { #[test] fn no_diff() { - let mut prior_schema = ResolvedTelemetrySchema::new("1.0", "", ""); + let mut prior_schema = ResolvedTelemetrySchema::new("1.0", ""); prior_schema.add_attribute_group( "group1", [ @@ -556,7 +555,7 @@ mod tests { #[test] fn detect_2_added_registry_attributes() { - let mut prior_schema = ResolvedTelemetrySchema::new("1.0", "", ""); + let mut prior_schema = ResolvedTelemetrySchema::new("1.0", ""); prior_schema.add_attribute_group( "registry.group1", [ @@ -565,7 +564,7 @@ mod tests { ], ); - let mut latest_schema = ResolvedTelemetrySchema::new("1.0", "", ""); + let mut latest_schema = ResolvedTelemetrySchema::new("1.0", ""); latest_schema.add_attribute_group( "registry.group1", [ @@ -584,7 +583,7 @@ mod tests { #[test] fn detect_2_deprecated_registry_attributes() { - let mut prior_schema = ResolvedTelemetrySchema::new("1.0", "", ""); + let mut prior_schema = ResolvedTelemetrySchema::new("1.0", ""); prior_schema.add_attribute_group( "registry.group1", [ @@ -598,7 +597,7 @@ mod tests { ], ); - let mut latest_schema = ResolvedTelemetrySchema::new("1.0", "", ""); + let mut latest_schema = ResolvedTelemetrySchema::new("1.0", ""); latest_schema.add_attribute_group( "registry.group1", [ @@ -645,7 +644,7 @@ mod tests { #[test] fn detect_2_renamed_registry_attributes() { - let mut prior_schema = ResolvedTelemetrySchema::new("1.0", "", ""); + let mut prior_schema = ResolvedTelemetrySchema::new("http://test/schemas/1.0", ""); prior_schema.add_attribute_group( "registry.group1", [ @@ -659,7 +658,7 @@ mod tests { // 2 new attributes are added: attr2_bis and attr3_bis // attr2 is renamed attr2_bis // attr3 is renamed attr3_bis - let mut latest_schema = ResolvedTelemetrySchema::new("1.0", "", ""); + let mut latest_schema = ResolvedTelemetrySchema::new("http://test/schemas/2.0", ""); latest_schema.add_attribute_group( "registry.group1", [ @@ -693,7 +692,7 @@ mod tests { #[test] fn detect_2_attributes_renamed_to_the_same_existing_attribute() { - let mut prior_schema = ResolvedTelemetrySchema::new("1.0", "", ""); + let mut prior_schema = ResolvedTelemetrySchema::new("1.0", ""); prior_schema.add_attribute_group( "registry.group1", [ @@ -705,7 +704,7 @@ mod tests { ); prior_schema.add_attribute_group("group2", [Attribute::string("attr5", "brief", "note")]); - let mut latest_schema = ResolvedTelemetrySchema::new("1.0", "", ""); + let mut latest_schema = ResolvedTelemetrySchema::new("1.0", ""); latest_schema.add_attribute_group( "registry.group1", [ @@ -732,7 +731,7 @@ mod tests { #[test] fn detect_2_attributes_renamed_to_the_same_new_attribute() { - let mut prior_schema = ResolvedTelemetrySchema::new("1.0", "", ""); + let mut prior_schema = ResolvedTelemetrySchema::new("1.0", ""); prior_schema.add_attribute_group( "registry.group1", [ @@ -743,7 +742,7 @@ mod tests { ], ); - let mut latest_schema = ResolvedTelemetrySchema::new("1.0", "", ""); + let mut latest_schema = ResolvedTelemetrySchema::new("1.0", ""); latest_schema.add_attribute_group( "registry.group1", [ @@ -776,7 +775,7 @@ mod tests { /// However, detecting this case is useful for identifying a violation of the process. #[test] fn detect_2_removed_attributes() { - let mut prior_schema = ResolvedTelemetrySchema::new("1.0", "", ""); + let mut prior_schema = ResolvedTelemetrySchema::new("1.0", ""); prior_schema.add_attribute_group( "registry.group1", [ @@ -787,7 +786,7 @@ mod tests { ], ); - let mut latest_schema = ResolvedTelemetrySchema::new("1.0", "", ""); + let mut latest_schema = ResolvedTelemetrySchema::new("1.0", ""); latest_schema.add_attribute_group( "registry.group1", [ @@ -805,9 +804,9 @@ mod tests { // TODO add many more group diff checks for various capabilities. #[test] fn detect_metric_name_change() { - let mut prior_schema = ResolvedTelemetrySchema::new("1.0", "test/base_version", ""); + let mut prior_schema = ResolvedTelemetrySchema::new("http://test/schemas/1.0", ""); prior_schema.add_metric_group("metrics.cpu.time", "cpu.time", [], None); - let mut latest_schema = ResolvedTelemetrySchema::new("1.0", "test/new_version", ""); + let mut latest_schema = ResolvedTelemetrySchema::new("http://test/schemas/2.0", ""); latest_schema.add_metric_group( "metrics.cpu.time", "cpu.time", diff --git a/crates/weaver_resolved_schema/src/v2/mod.rs b/crates/weaver_resolved_schema/src/v2/mod.rs index 28e2d7cc1..7acc570e4 100644 --- a/crates/weaver_resolved_schema/src/v2/mod.rs +++ b/crates/weaver_resolved_schema/src/v2/mod.rs @@ -7,7 +7,7 @@ use serde::{Deserialize, Serialize}; use weaver_semconv::{ deprecated::Deprecated, group::GroupType, - manifest::RegistryManifest, + manifest::{RegistryManifest, SchemaUrl}, v2::{ attribute_group::AttributeGroupVisibilitySpec, signal_id::SignalId, span::SpanName, CommonFields, @@ -50,9 +50,7 @@ pub struct ResolvedTelemetrySchema { /// Version of the file structure. pub file_format: String, /// Schema URL that this file is published at. - pub schema_url: String, - /// The ID of the registry that this schema belongs to. - pub registry_id: String, + pub schema_url: SchemaUrl, /// Catalog of attributes. Note: this will include duplicates for the same key. pub attribute_catalog: Vec, /// The registry that this schema belongs to. @@ -60,8 +58,8 @@ pub struct ResolvedTelemetrySchema { /// Refinements for the registry pub refinements: Refinements, /// The manifest of the registry. - #[serde(skip_serializing)] - pub registry_manifest: Option, + #[serde(skip)] + pub manifest: Option, } impl ResolvedTelemetrySchema { @@ -131,7 +129,6 @@ impl TryFrom for ResolvedTelemetrySchema { Ok(ResolvedTelemetrySchema { file_format: V2_RESOLVED_FILE_FORMAT.to_owned(), schema_url: value.schema_url, - registry_id: value.registry_id, attribute_catalog, registry, refinements, @@ -505,7 +502,6 @@ pub fn convert_v1_to_v2( } let v2_registry = Registry { - registry_url: r.registry_url, attributes, spans, metrics, @@ -989,11 +985,10 @@ mod tests { fn test_try_from_v1_to_v2() { let v1_schema = crate::ResolvedTelemetrySchema { file_format: V1_RESOLVED_FILE_FORMAT.to_owned(), - schema_url: "my.schema.url".to_owned(), - registry_id: "my-registry".to_owned(), + schema_url: SchemaUrl("http://test/schemas/1.0.0".to_owned()), catalog: crate::catalog::Catalog::from_attributes(vec![]), registry: crate::registry::Registry { - registry_url: "my.schema.url".to_owned(), + registry_url: "http://test/schemas/1.0".to_owned(), groups: vec![], }, instrumentation_library: None, @@ -1007,8 +1002,10 @@ mod tests { assert!(v2_schema.is_ok()); let v2_schema = v2_schema.unwrap(); assert_eq!(v2_schema.file_format, V2_RESOLVED_FILE_FORMAT); - assert_eq!(v2_schema.schema_url, "my.schema.url"); - assert_eq!(v2_schema.registry_id, "my-registry"); + assert_eq!( + v2_schema.schema_url, + SchemaUrl("http://test/schemas/1.0.0".to_owned()) + ); } #[test] @@ -1216,13 +1213,11 @@ mod tests { fn empty_v2_schema() -> ResolvedTelemetrySchema { ResolvedTelemetrySchema { file_format: V2_RESOLVED_FILE_FORMAT.to_owned(), - schema_url: "my.schema.url".to_owned(), - registry_id: "main".to_owned(), + schema_url: SchemaUrl("http://test/schemas/1.0".to_owned()), attribute_catalog: vec![], registry: Registry { attributes: vec![], attribute_groups: vec![], - registry_url: "todo".to_owned(), spans: vec![], metrics: vec![], events: vec![], diff --git a/crates/weaver_resolved_schema/src/v2/registry.rs b/crates/weaver_resolved_schema/src/v2/registry.rs index bdea11ce1..147977427 100644 --- a/crates/weaver_resolved_schema/src/v2/registry.rs +++ b/crates/weaver_resolved_schema/src/v2/registry.rs @@ -35,11 +35,6 @@ pub struct Registry { /// Catalog of (public) attribute groups. pub attribute_groups: Vec, - /// The semantic convention registry url. - /// - /// This is the base URL, under which this registry can be found. - pub registry_url: String, - /// A list of span signal definitions. pub spans: Vec, @@ -267,7 +262,6 @@ mod test { }]; let registry = Registry { attribute_groups: vec![], - registry_url: "https://opentelemetry.io/schemas/1.23.0".to_owned(), spans: vec![Span { r#type: "test.span".to_owned().into(), kind: SpanKindSpec::Client, diff --git a/crates/weaver_resolver/data/circular-registry-test/registry_a/registry_manifest.yaml b/crates/weaver_resolver/data/circular-registry-test/registry_a/registry_manifest.yaml index 0fcd0ce52..8e5b52e46 100644 --- a/crates/weaver_resolver/data/circular-registry-test/registry_a/registry_manifest.yaml +++ b/crates/weaver_resolver/data/circular-registry-test/registry_a/registry_manifest.yaml @@ -3,5 +3,5 @@ description: Test registry A for circular dependency testing. semconv_version: 0.1.0 schema_base_url: https://example.com/registry_a/schemas/ dependencies: - - name: registry_b + - schema_url: https://example.com/registry_b/schemas/1.0.0 registry_path: data/circular-registry-test/registry_b \ No newline at end of file diff --git a/crates/weaver_resolver/data/circular-registry-test/registry_b/registry_manifest.yaml b/crates/weaver_resolver/data/circular-registry-test/registry_b/registry_manifest.yaml index d09a81eb1..f50389496 100644 --- a/crates/weaver_resolver/data/circular-registry-test/registry_b/registry_manifest.yaml +++ b/crates/weaver_resolver/data/circular-registry-test/registry_b/registry_manifest.yaml @@ -3,5 +3,7 @@ description: Test registry B for circular dependency testing. semconv_version: 0.1.0 schema_base_url: https://example.com/registry_b/schemas/ dependencies: - - name: registry_a + # TODO: support legacy name-based dependencies as well (with warning) + #- name: registry_a + - schema_url: https://example.com/registry_a/schemas/1.0.0 registry_path: data/circular-registry-test/registry_a \ No newline at end of file diff --git a/crates/weaver_resolver/data/multi-registry/app_registry/registry_manifest.yaml b/crates/weaver_resolver/data/multi-registry/app_registry/registry_manifest.yaml index d9cf26bc9..be13985a8 100644 --- a/crates/weaver_resolver/data/multi-registry/app_registry/registry_manifest.yaml +++ b/crates/weaver_resolver/data/multi-registry/app_registry/registry_manifest.yaml @@ -3,5 +3,5 @@ description: This registry contains the semantic conventions for the App. semconv_version: 0.1.0 schema_base_url: https://app.com/schemas/ dependencies: - - name: acme + - schema_url: https://acme.com/schemas/0.1.0 registry_path: data/multi-registry/custom_registry diff --git a/crates/weaver_resolver/data/multi-registry/custom_registry/registry_manifest.yaml b/crates/weaver_resolver/data/multi-registry/custom_registry/registry_manifest.yaml index 711eb37f4..8e0a5081b 100644 --- a/crates/weaver_resolver/data/multi-registry/custom_registry/registry_manifest.yaml +++ b/crates/weaver_resolver/data/multi-registry/custom_registry/registry_manifest.yaml @@ -3,5 +3,5 @@ description: This registry contains the semantic conventions for the Acme vendor semconv_version: 0.1.0 schema_base_url: https://acme.com/schemas/ dependencies: - - name: otel + - schema_url: https://opentelemetry.io/schemas/1.30.0 registry_path: data/multi-registry/otel_registry diff --git a/crates/weaver_resolver/data/registry-test-published-1/expected-registry.json b/crates/weaver_resolver/data/registry-test-published-1/expected-registry.json index 0f2def153..e0b0e9af3 100644 --- a/crates/weaver_resolver/data/registry-test-published-1/expected-registry.json +++ b/crates/weaver_resolver/data/registry-test-published-1/expected-registry.json @@ -58,12 +58,12 @@ "name": "my-span", "lineage": { "provenance": { - "registry_id": "acme", + "registry_id": "acme.com/schemas", "path": "data/registry-test-published-1/registry/main.yaml" }, "attributes": { "a": { - "source_group": "v2_dependency.published", + "source_group": "v2_dependency.opentelemetry.io/schemas", "inherited_fields": [ "annotations", "brief", diff --git a/crates/weaver_resolver/data/registry-test-published-1/published/resolved_schema.yaml b/crates/weaver_resolver/data/registry-test-published-1/published/resolved_schema.yaml index 189cdb46f..96103a4f1 100644 --- a/crates/weaver_resolver/data/registry-test-published-1/published/resolved_schema.yaml +++ b/crates/weaver_resolver/data/registry-test-published-1/published/resolved_schema.yaml @@ -1,13 +1,11 @@ file_format: resolved/2.0.0 -schema_url: http://todo -registry_id: published +schema_url: https://opentelemetry.io/schemas/1.0.0 attribute_catalog: - key: a type: string brief: test a stability: stable registry: - registry_url: todo-why? attributes: - 0 attribute_groups: diff --git a/crates/weaver_resolver/data/registry-test-published-1/registry/registry_manifest.yaml b/crates/weaver_resolver/data/registry-test-published-1/registry/registry_manifest.yaml index ad5d44275..d48e2ab4e 100644 --- a/crates/weaver_resolver/data/registry-test-published-1/registry/registry_manifest.yaml +++ b/crates/weaver_resolver/data/registry-test-published-1/registry/registry_manifest.yaml @@ -3,5 +3,5 @@ description: This registry contains the semantic conventions for the Acme vendor semconv_version: 0.1.0 schema_base_url: https://acme.com/schemas/ dependencies: - - name: published + - schema_url: https://example.com/schemas/1.2.3 registry_path: data/registry-test-published-1/published diff --git a/crates/weaver_resolver/src/attribute.rs b/crates/weaver_resolver/src/attribute.rs index 99f782b3f..908101c7f 100644 --- a/crates/weaver_resolver/src/attribute.rs +++ b/crates/weaver_resolver/src/attribute.rs @@ -321,7 +321,7 @@ impl AttributeLookup for V1Schema { impl AttributeLookup for V2Schema { fn lookup_attribute(&self, key: &str) -> Option { - let fake_group_id = format!("v2_dependency.{}", self.registry_id); + let fake_group_id = format!("v2_dependency.{}", self.schema_url.name()); self.attribute_catalog.iter().find_map(|attr| { if attr.key == key { Some(AttributeWithGroupId { diff --git a/crates/weaver_resolver/src/dependency.rs b/crates/weaver_resolver/src/dependency.rs index 24ead7e59..7345d14ba 100644 --- a/crates/weaver_resolver/src/dependency.rs +++ b/crates/weaver_resolver/src/dependency.rs @@ -167,7 +167,7 @@ impl ImportableDependency for V2Schema { for ar in m.attributes.iter() { let attr = self.attribute_catalog.attribute(&ar.base).ok_or( Error::InvalidRegistryAttributeRef { - registry_id: self.registry_id.clone(), + registry_name: self.schema_url.name().clone(), attribute_ref: ar.base.0, }, )?; @@ -214,7 +214,7 @@ impl ImportableDependency for V2Schema { for ar in e.attributes.iter() { let attr = self.attribute_catalog.attribute(&ar.base).ok_or( Error::InvalidRegistryAttributeRef { - registry_id: self.registry_id.clone(), + registry_name: self.schema_url.name().clone(), attribute_ref: ar.base.0, }, )?; @@ -262,7 +262,7 @@ impl ImportableDependency for V2Schema { // TODO - this should be non-panic errors. let attr = self.attribute_catalog.attribute(&ar.base).ok_or( Error::InvalidRegistryAttributeRef { - registry_id: self.registry_id.clone(), + registry_name: self.schema_url.name().clone(), attribute_ref: ar.base.0, }, )?; @@ -276,7 +276,7 @@ impl ImportableDependency for V2Schema { // TODO - this should be non-panic errors. let attr = self.attribute_catalog.attribute(&ar.base).ok_or( Error::InvalidRegistryAttributeRef { - registry_id: self.registry_id.clone(), + registry_name: self.schema_url.name().clone(), attribute_ref: ar.base.0, }, )?; @@ -439,6 +439,7 @@ mod tests { use itertools::Itertools; use std::error::Error; use weaver_resolved_schema::ResolvedTelemetrySchema as V1Schema; + use weaver_semconv::manifest::SchemaUrl; use crate::dependency::{ResolvedDependency, UnresolvedAttributeLookup}; @@ -470,8 +471,7 @@ mod tests { fn example_v1_schema() -> V1Schema { V1Schema { file_format: "resolved/1.0.0".to_owned(), - schema_url: "v1-example".to_owned(), - registry_id: "v1-example".to_owned(), + schema_url: SchemaUrl("http://test/schemas/1.0.0".to_owned()), registry: weaver_resolved_schema::registry::Registry { registry_url: "v1-example".to_owned(), groups: vec![weaver_resolved_schema::registry::Group { diff --git a/crates/weaver_resolver/src/error.rs b/crates/weaver_resolver/src/error.rs index f2e01eb11..e8d2add5a 100644 --- a/crates/weaver_resolver/src/error.rs +++ b/crates/weaver_resolver/src/error.rs @@ -19,21 +19,26 @@ pub enum Error { FailToResolveDefinition(#[from] weaver_semconv::Error), /// We discovered a circular dependency we cannot resolve. - #[error("Circular dependency detected: registry '{registry_id}' depends on itself through the chain: {chain}")] + #[error("Circular dependency detected: registry '{registry_name}' depends on itself through the chain: {chain}")] CircularDependency { /// The registry that depends on itself. - registry_id: String, + registry_name: String, + /// A string representing the dependency chain. chain: String, }, /// We've reached the maximum dependency depth for this registry. - #[error("Maximum dependency depth reached for registry `{registry}`. Cannot load further dependencies.")] + #[error("Maximum dependency depth reached for registry `{registry_name}`. Cannot load further dependencies.")] MaximumDependencyDepth { /// The registry which has too many dependencies. - registry: String, + registry_name: String, }, + /// Failed to resolve the schema URL for a registry. + #[error("Schema URL is missing in the manifest and cannot be constructed from the registry name and version.")] + FailToResolveSchemaUrl {}, + /// An invalid URL. #[error("Invalid URL `{url:?}`, error: {error:?})")] #[diagnostic(help("Check the URL and try again."))] @@ -168,10 +173,12 @@ pub enum Error { }, /// We - #[error("Invalid registry: {registry_id}. Unable to find attribute by index: {attribute_ref}")] + #[error( + "Invalid registry: {registry_name}. Unable to find attribute by index: {attribute_ref}" + )] InvalidRegistryAttributeRef { /// The registry with the issue. - registry_id: String, + registry_name: String, /// The attribute index that does not exist in the registry. attribute_ref: u32, }, diff --git a/crates/weaver_resolver/src/lib.rs b/crates/weaver_resolver/src/lib.rs index 940dc104a..587e8ddd9 100644 --- a/crates/weaver_resolver/src/lib.rs +++ b/crates/weaver_resolver/src/lib.rs @@ -3,6 +3,7 @@ #![doc = include_str!("../README.md")] use weaver_semconv::group::ImportsWithProvenance; +use weaver_semconv::manifest::SchemaUrl; use crate::attribute::AttributeCatalog; use crate::dependency::ResolvedDependency; @@ -88,8 +89,20 @@ impl SchemaResolver { WResult::FatalErr(e) => return WResult::FatalErr(e), } } - let registry_id: String = repo.id().to_string(); let manifest = repo.manifest().cloned(); + let schema_url = if let Some(m) = manifest.as_ref() { + match m.schema_url.clone() { + Some(url) => url, + None => { + return WResult::FatalErr(Error::FailToResolveSchemaUrl {}); + } + } + } else { + match SchemaUrl::from_name_version(&repo.name(), &repo.version()) { + Ok(url) => url, + Err(_) => return WResult::FatalErr(Error::FailToResolveSchemaUrl {}), + } + }; let mut attr_catalog = AttributeCatalog::default(); // TODO - Do something with non_fatal_errors if we need to. resolve_registry_with_dependencies( @@ -105,8 +118,7 @@ impl SchemaResolver { ResolvedTelemetrySchema { file_format: "1.0.0".to_owned(), - schema_url: "".to_owned(), - registry_id, + schema_url: schema_url, registry: resolved_registry, catalog, resource: None, @@ -235,7 +247,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "data/multi-registry/custom_registry".to_owned(), }; - let registry_repo = RegistryRepo::try_new("main", ®istry_path)?; + let registry_repo = RegistryRepo::try_new(Some("main"), Some("1.0.0"), ®istry_path)?; // test with the `include_unreferenced` flag set to false check_semconv_load_and_resolve(registry_repo.clone(), false); // test with the `include_unreferenced` flag set to true @@ -249,7 +261,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "data/multi-registry/app_registry".to_owned(), }; - let registry_repo = RegistryRepo::try_new("app", ®istry_path)?; + let registry_repo = RegistryRepo::try_new(None, None, ®istry_path)?; let result = SchemaResolver::load_semconv_repository(registry_repo, true); match result { @@ -263,19 +275,22 @@ mod tests { ); // Verify we have specs from all three registries - let registry_ids = loaded.registry_ids(); + let registry_names = loaded.registry_names(); assert!( - registry_ids.contains(&"app".to_owned()), - "Missing app registry specs" + registry_names.contains(&"app.com/schemas".to_owned()), + "Missing app registry specs, available registries: {:?}", + registry_names ); assert!( - registry_ids.contains(&"acme".to_owned()), - "Missing acme registry specs" + registry_names.contains(&"acme.com/schemas".to_owned()), + "Missing acme registry specs, available registries: {:?}", + registry_names ); assert!( - registry_ids.contains(&"otel".to_owned()), - "Missing otel registry specs" + registry_names.contains(&"opentelemetry.io/schemas".to_owned()), + "Missing otel registry specs, available registries: {:?}", + registry_names ); // Now test the resolved registry content diff --git a/crates/weaver_resolver/src/loader.rs b/crates/weaver_resolver/src/loader.rs index 7d1e8af29..34190c5d0 100644 --- a/crates/weaver_resolver/src/loader.rs +++ b/crates/weaver_resolver/src/loader.rs @@ -14,7 +14,7 @@ use weaver_common::result::WResult; use weaver_resolved_schema::v2::ResolvedTelemetrySchema as V2Schema; use weaver_resolved_schema::ResolvedTelemetrySchema as V1Schema; use weaver_semconv::json_schema::JsonSchemaValidator; -use weaver_semconv::registry_repo::{LEGACY_REGISTRY_MANIFEST, REGISTRY_MANIFEST, RegistryRepo}; +use weaver_semconv::registry_repo::{RegistryRepo, LEGACY_REGISTRY_MANIFEST, REGISTRY_MANIFEST}; use weaver_semconv::{group::ImportsWithProvenance, semconv::SemConvSpecWithProvenance}; use crate::Error; @@ -48,9 +48,11 @@ impl LoadedSemconvRegistry { use weaver_common::vdir::VirtualDirectoryPath; use weaver_semconv::provenance::Provenance; let path: VirtualDirectoryPath = "data".try_into().expect("Bad fake path for test"); - let repo = RegistryRepo::try_new("default", &path).map_err(|e| Error::InvalidUrl { - url: "test string".to_owned(), - error: format!("{e}"), + let repo = RegistryRepo::try_new(Some("default"), Some("1.0.0"), &path).map_err(|e| { + Error::InvalidUrl { + url: "test string".to_owned(), + error: format!("{e}"), + } })?; let provenance = Provenance::new("default", ""); let spec_with_provenance = SemConvSpecWithProvenance::from_string(provenance, spec) @@ -79,8 +81,8 @@ impl LoadedSemconvRegistry { match self { LoadedSemconvRegistry::Unresolved { repo, .. } => repo.registry_path_repr(), // TODO - are these correct? - LoadedSemconvRegistry::Resolved(schema) => &schema.schema_url, - LoadedSemconvRegistry::ResolvedV2(schema) => &schema.schema_url, + LoadedSemconvRegistry::Resolved(schema) => &schema.schema_url.0, + LoadedSemconvRegistry::ResolvedV2(schema) => &schema.schema_url.0, } } @@ -104,19 +106,19 @@ impl LoadedSemconvRegistry { /// Returns all the registry ids in this loaded registry and its dependencies. #[cfg(test)] #[must_use] - pub fn registry_ids(&self) -> Vec { + pub fn registry_names(&self) -> Vec { match self { LoadedSemconvRegistry::Unresolved { repo, dependencies, .. } => { - let mut result = vec![repo.id().to_string()]; + let mut result = vec![repo.name().to_string()]; for d in dependencies { - result.extend(d.registry_ids()); + result.extend(d.registry_names()); } result } - LoadedSemconvRegistry::Resolved(schema) => vec![schema.registry_id.clone()], - LoadedSemconvRegistry::ResolvedV2(schema) => vec![schema.registry_id.clone()], + LoadedSemconvRegistry::Resolved(schema) => vec![schema.schema_url.name().clone()], + LoadedSemconvRegistry::ResolvedV2(schema) => vec![schema.schema_url.name().clone()], } } } @@ -132,11 +134,11 @@ impl Display for LoadedSemconvRegistry { } => write!( f, "{} - [{}]", - repo.id(), + repo.schema_url(), dependencies.iter().map(|d| format!("{d}")).join(",") ), - LoadedSemconvRegistry::Resolved(schema) => write!(f, "{}", schema.registry_id), - LoadedSemconvRegistry::ResolvedV2(schema) => write!(f, "{}", schema.registry_id), + LoadedSemconvRegistry::Resolved(schema) => write!(f, "{}", schema.schema_url), + LoadedSemconvRegistry::ResolvedV2(schema) => write!(f, "{}", schema.schema_url), } } } @@ -173,22 +175,22 @@ fn load_semconv_repository_recursive( // Make sure we don't go past our max dependency depth. if max_dependency_depth == 0 { return WResult::FatalErr(Error::MaximumDependencyDepth { - registry: registry_repo.registry_path_repr().to_owned(), + registry_name: registry_repo.registry_path_repr().to_owned(), }); } - let registry_id = registry_repo.id().to_string(); + let registry_name = registry_repo.name().to_string(); // Check for circular dependency - if visited_registries.contains(®istry_id) { - dependency_chain.push(registry_id.clone()); + if visited_registries.contains(®istry_name) { + dependency_chain.push(registry_name.clone()); let chain_str = dependency_chain.join(" → "); return WResult::FatalErr(Error::CircularDependency { - registry_id, + registry_name: registry_name.clone(), chain: chain_str, }); } // Add current registry to visited set and dependency chain - let _ = visited_registries.insert(registry_id.clone()); - dependency_chain.push(registry_id.clone()); + let _ = visited_registries.insert(registry_name.clone()); + dependency_chain.push(registry_name.clone()); // Either load a fully resolved repository, or read in raw files. if let Some(manifest) = registry_repo.manifest() { @@ -202,7 +204,18 @@ fn load_semconv_repository_recursive( let mut loaded_dependencies = vec![]; let mut non_fatal_errors = vec![]; for d in manifest.dependencies.iter() { - match RegistryRepo::try_new(&d.name, &d.registry_path) { + let registry_path = d.registry_path.clone().unwrap_or_else(|| { + // If no registry path is provided, we assume it's the same as the parent registry. + VirtualDirectoryPath::RemoteArchive { + url: d.schema_url.to_string(), + sub_folder: None, + } + }); + match RegistryRepo::try_new( + Some(&d.schema_url.name()), + Some(&d.schema_url.version()), + ®istry_path, + ) { Ok(d_repo) => { // so we need to make sure the dependency chain only include direct dependencies of each other. match load_semconv_repository_recursive( @@ -305,7 +318,7 @@ fn load_definition_repository( // TODO - less confusing way to load semconv specs. vec![SemConvRegistry::semconv_spec_from_file( - ®istry_repo.id(), + ®istry_repo.name(), entry.path(), &unversioned_validator, &versioned_validator, @@ -398,7 +411,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "data/multi-registry/custom_registry".to_owned(), }; - let registry_repo = RegistryRepo::try_new("main", ®istry_path)?; + let registry_repo = RegistryRepo::try_new(Some("main"), Some("1.0.0"), ®istry_path)?; let mut diag_msgs = DiagnosticMessages::empty(); let loaded = load_semconv_repository(registry_repo, false) .capture_non_fatal_errors(&mut diag_msgs)?; @@ -410,7 +423,7 @@ mod tests { dependencies, } = loaded { - assert_eq!("acme", repo.id().as_ref()); + assert_eq!("acme.com/schemas", repo.name().as_ref()); assert_eq!(dependencies.len(), 1); assert_eq!(specs.len(), 1); assert_eq!(imports.len(), 1); @@ -421,7 +434,7 @@ mod tests { dependencies, }] = &dependencies.as_slice() { - assert_eq!("otel", repo.id().as_ref()); + assert_eq!("opentelemetry.io/schemas", repo.name().as_ref()); assert_eq!(dependencies.len(), 0); assert_eq!(specs.len(), 1); assert_eq!(imports.len(), 0); @@ -440,7 +453,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "data/multi-registry/app_registry".to_owned(), }; - let registry_repo = RegistryRepo::try_new("app", ®istry_path)?; + let registry_repo = RegistryRepo::try_new(Some("app"), Some("1.0.0"), ®istry_path)?; // Try with depth limit of 1 - should fail at acme->otel transition let mut visited_registries = HashSet::new(); @@ -475,7 +488,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "data/circular-registry-test/registry_a".to_owned(), }; - let registry_repo = RegistryRepo::try_new("registry_a", ®istry_path)?; + let registry_repo = RegistryRepo::try_new(Some("registry_a"), Some(""), ®istry_path)?; let result = load_semconv_repository(registry_repo, true); match result { diff --git a/crates/weaver_resolver/src/registry.rs b/crates/weaver_resolver/src/registry.rs index 3b6ae70a3..e1efbf897 100644 --- a/crates/weaver_resolver/src/registry.rs +++ b/crates/weaver_resolver/src/registry.rs @@ -918,12 +918,14 @@ mod tests { let observed_output_dir = PathBuf::from(format!("observed_output/{test_dir}")); std::fs::create_dir_all(observed_output_dir.clone()) .expect("Failed to create observed output directory"); - let registry_id = "default"; + let registry_name = "default"; + let registry_version = "0.1.0"; let location: VirtualDirectoryPath = format!("{test_dir}/registry") .try_into() .expect("Failed to parse file directory"); let loaded = SchemaResolver::load_semconv_repository( - RegistryRepo::try_new(registry_id, &location).expect("Failed to load registry"), + RegistryRepo::try_new(Some(registry_name), Some(registry_version), &location) + .expect("Failed to load registry"), true, ) .ignore(|e| { @@ -1105,6 +1107,7 @@ groups: #[test] fn test_api_usage() -> Result<(), Box> { let registry_id = "local"; + let registry_version = "1.0.0"; // Load a semantic convention registry from a local directory. // Note: A method is also available to load a registry from a git @@ -1113,7 +1116,8 @@ groups: let path = VirtualDirectoryPath::LocalFolder { path: "data/registry-test-7-spans/registry".to_owned(), }; - let repo = RegistryRepo::try_new(registry_id, &path)?; + + let repo = RegistryRepo::try_new(Some(registry_id), Some(registry_version), &path)?; let loaded = SchemaResolver::load_semconv_repository(repo, true).into_result_failing_non_fatal()?; let resolved_schema = diff --git a/crates/weaver_search/src/lib.rs b/crates/weaver_search/src/lib.rs index 8dcd39eb7..70a2a1ace 100644 --- a/crates/weaver_search/src/lib.rs +++ b/crates/weaver_search/src/lib.rs @@ -478,6 +478,7 @@ mod tests { use weaver_semconv::attribute::AttributeType; use weaver_semconv::deprecated::Deprecated; use weaver_semconv::group::{InstrumentSpec, SpanKindSpec}; + use weaver_semconv::manifest::SchemaUrl; use weaver_semconv::stability::Stability; use weaver_semconv::v2::span::SpanName; use weaver_semconv::v2::CommonFields; @@ -543,7 +544,7 @@ mod tests { fn make_test_registry() -> ForgeResolvedRegistry { ForgeResolvedRegistry { - registry_url: "test".to_owned(), + schema_url: SchemaUrl("https://example.com/schemas/1.2.3".to_owned()), registry: Registry { attributes: vec![ make_attribute("http.request.method", "HTTP request method", "", false), diff --git a/crates/weaver_semconv/src/manifest.rs b/crates/weaver_semconv/src/manifest.rs index 548d54cf9..f3f82721f 100644 --- a/crates/weaver_semconv/src/manifest.rs +++ b/crates/weaver_semconv/src/manifest.rs @@ -12,11 +12,94 @@ use crate::stability::Stability; use crate::Error; use crate::Error::{InvalidRegistryManifest, RegistryManifestNotFound}; use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::path::PathBuf; use weaver_common::error::handle_errors; use weaver_common::vdir::VirtualDirectoryPath; +/// Represents the schema URL of a registry, which serves as a unique identifier for the registry +/// along with its version. +#[derive(Debug, Clone, PartialEq, Eq, Hash, JsonSchema)] +pub struct SchemaUrl(pub String); + +impl SchemaUrl { + /// Validate the schema URL format. + pub fn validate(&self) -> Result<(), String> { + let parsed = url::Url::parse(&self.0).map_err(|e| format!("Invalid schema URL: {e}"))?; + if parsed.path_segments().map(|c| c.count()).unwrap_or(0) == 0 { + return Err("The schema URL must have at least one path segment.".to_owned()); + } + Ok(()) + } + + /// Returns the registry name, derived from the schema URL. + pub fn name(&self) -> String { + let parsed_url = url::Url::parse(&self.0).expect("schema_url must be valid"); + let path = parsed_url.path().trim_matches('/'); + let mut segments: Vec<&str> = path.split('/').collect(); + if !segments.is_empty() { + _ = segments.pop(); + } + + if segments.is_empty() { + return parsed_url.authority().to_string(); + } + + format!("{}/{}", parsed_url.authority(), segments.join("/")) + } + + /// Returns the registry version, derived from the schema URL. + pub fn version(&self) -> String { + let parsed_url = url::Url::parse(&self.0).expect("schema_url must be valid"); + parsed_url + .path() + .trim_matches('/') + .rsplit('/') + .next() + .unwrap_or("") + .to_string() + } + + /// Create a SchemaUrl from name and version. + pub fn from_name_version(name: &str, version: &str) -> Result { + let schema_url_str; + // TODO: replace with scheme regex + if name.starts_with("http://") || name.starts_with("https://") { + schema_url_str = format!("{}/{}", name.trim_end_matches('/'), version); + } else { + schema_url_str = format!("https://{}/{}", name.trim_end_matches('/'), version); + } + let schema_url = SchemaUrl(schema_url_str); + schema_url.validate()?; + Ok(schema_url) + } +} + +impl std::fmt::Display for SchemaUrl { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.0) + } +} + +impl<'de> Deserialize<'de> for SchemaUrl { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let s = String::deserialize(deserializer)?; + Ok(SchemaUrl(s)) + } +} + +impl Serialize for SchemaUrl { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_str(&self.0) + } +} + /// Represents the information of a semantic convention registry manifest. /// /// This information defines the registry's name, version, description, and schema @@ -34,7 +117,7 @@ pub struct RegistryManifest { /// a unique identifier of the registry. It MUST follow OTel schema URL format, which is: /// `http[s]://server[:port]/path/`. /// See https://github.com/open-telemetry/opentelemetry-specification/blob/v1.53.0/specification/schemas/README.md#schema-url for more details. - pub schema_url: Option, + pub schema_url: Option, /// An optional description of the registry. /// @@ -73,17 +156,25 @@ pub struct RegistryManifest { pub resolved_schema_uri: Option, } -/// Represents a dependency of a semantic convention registry. +/// Represents a dependency of a semantic convention registry as defined in YAML. #[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] pub struct Dependency { - /// The name of the dependency. - pub name: String, - /// The path to the dependency. + /// The schema URL for the dependency (required). + /// It must follow OTel schema URL format, which is: `http[s]://server[:port]/path/`. + /// This is not necessarily the URL registry can be accessed at, but it provides + /// a unique identifier for the dependency registry and its version. /// + /// When registry is not published yet, this field should be populated with a placeholder URL, + /// but it must follow the URL format and include a version segment. + /// The actual registry files can be provided in `registry_path` field. + pub schema_url: SchemaUrl, + + /// The path to the dependency (optional). /// This can be either: /// - A manifest of a published registry /// - A directory containing the raw definition. - pub registry_path: VirtualDirectoryPath, + #[serde(skip_serializing_if = "Option::is_none")] + pub registry_path: Option, } impl RegistryManifest { @@ -115,11 +206,16 @@ impl RegistryManifest { // If the schema URL is not provided, populate it using deprecated schema_base_url and semconv_version // validation would fail if they were not provided if manifest.schema_url.is_none() { - manifest.schema_url = Some(format!( - "{}/{}", - manifest.schema_base_url.clone().unwrap_or_default(), - manifest.semconv_version.clone().unwrap_or_default() - )); + manifest.schema_url = Some( + SchemaUrl::from_name_version( + &manifest.schema_base_url.clone().unwrap_or_default(), + &manifest.semconv_version.clone().unwrap_or_default(), + ) + .map_err(|e| InvalidRegistryManifest { + path: manifest_path_buf.clone(), + error: e, + })?, + ); } Ok(manifest) @@ -128,43 +224,52 @@ impl RegistryManifest { fn validate(&self, path: PathBuf) -> Result<(), Error> { let mut errors = vec![]; - let schema_url_empty = self.schema_url.as_ref().map_or(true, |url| url.is_empty()); - let schema_base_url_empty = self.schema_base_url.as_ref().map_or(true, |url| url.is_empty()); - let semconv_version_empty = self.semconv_version.as_ref().map_or(true, |v| v.is_empty()); - - if schema_url_empty { - if schema_base_url_empty || semconv_version_empty { + if self.schema_url.is_none() { + if self.schema_base_url.is_none() || self.semconv_version.is_none() { errors.push(InvalidRegistryManifest { path: path.clone(), error: "The registry schema URL is required.".to_owned(), }); } else { - // schema_base_url should be a valid absolute URL, otherwise push an error to the list. - if let Err(e) = url::Url::parse(self.schema_base_url.as_ref().unwrap()) { + if self + .schema_base_url + .as_ref() + .map_or(true, |url| url.is_empty()) + { errors.push(InvalidRegistryManifest { path: path.clone(), - error: format!("Invalid schema base URL: {}", e), + error: "The registry schema base URL is required.".to_owned(), }); - } - } - } else { - // validate the resolved schema URL: it must be a valid absolute URI with at least one path segment - match url::Url::parse(self.schema_url.as_ref().unwrap()) { - Ok(parsed_url) => { - if parsed_url.path_segments().map(|c| c.count()).unwrap_or(0) == 0 { + } else { + if let Err(e) = url::Url::parse(self.schema_base_url.as_ref().unwrap()) { errors.push(InvalidRegistryManifest { path: path.clone(), - error: "The registry schema URL must have at least one path segment.".to_owned(), + error: format!("Invalid schema base URL: {}", e), }); } } - Err(e) => { + + if self + .semconv_version + .as_ref() + .map_or(true, |version| version.is_empty()) + { errors.push(InvalidRegistryManifest { path: path.clone(), - error: format!("Invalid schema URL: {}", e), + error: "The registry version is required.".to_owned(), }); } } + } else { + // validate the resolved schema URL: it must be a valid absolute URI with at least one path segment + if let Some(url) = self.schema_url.as_ref() { + url.validate().unwrap_or_else(|e| { + errors.push(InvalidRegistryManifest { + path: path.clone(), + error: format!("Invalid schema URL: {}", e), + }); + }); + } } handle_errors(errors)?; @@ -175,29 +280,21 @@ impl RegistryManifest { /// For example, if the schema URL is `https://opentelemetry.io/schemas/sub-component/1.0.0`, /// the registry name would be `opentelemetry.io/schemas/sub-component` pub fn name(&self) -> String { - let schema_url = self.schema_url.as_ref().expect("schema_url was validated"); - let parsed_url = url::Url::parse(schema_url).expect("schema_url was validated"); - let authority = parsed_url.host_str().unwrap_or_default(); - let path = parsed_url.path().trim_matches('/'); - let mut segments: Vec<&str> = path.split('/').collect(); - if !segments.is_empty() { - _ = segments.pop(); - } - format!("{}/{}", authority, segments.join("/")) + self.schema_url + .as_ref() + .map(|url| url.name()) + .unwrap_or_default() + .to_string() } /// Returns the registry version, which is derived from the schema URL. /// For example, if the schema URL is `https://opentelemetry.io/schemas/sub-component/1.0.0`, /// the registry version would be `1.0.0` pub fn version(&self) -> String { - let schema_url = self.schema_url.as_ref().expect("schema_url was validated"); - let parsed_url = url::Url::parse(schema_url).expect("schema_url was validated"); - parsed_url - .path() - .trim_matches('/') - .rsplit('/') - .next() - .unwrap_or("") + self.schema_url + .as_ref() + .map(|url| url.version()) + .unwrap_or_default() .to_string() } } @@ -230,7 +327,7 @@ mod tests { let config = RegistryManifest::try_from_file("tests/test_data/valid_semconv_registry_manifest.yaml") .expect("Failed to load the registry configuration file."); - assert_eq!(config.name(), "vendor_acme"); + assert_eq!(config.name(), "acme.com/schemas"); assert_eq!(config.version(), "0.1.0"); } @@ -244,16 +341,12 @@ mod tests { let expected_errs = CompoundError(vec![ InvalidRegistryManifest { path: path.clone(), - error: "The registry name is required.".to_owned(), + error: "The registry schema base URL is required.".to_owned(), }, InvalidRegistryManifest { path: path.clone(), error: "The registry version is required.".to_owned(), }, - InvalidRegistryManifest { - path: path.clone(), - error: "The registry schema base URL is required.".to_owned(), - }, ]); if let Err(observed_errs) = result { diff --git a/crates/weaver_semconv/src/registry.rs b/crates/weaver_semconv/src/registry.rs index 7e33fa5b2..11251cd95 100644 --- a/crates/weaver_semconv/src/registry.rs +++ b/crates/weaver_semconv/src/registry.rs @@ -125,7 +125,7 @@ impl SemConvRegistry { LazyLock::new(|| Regex::new(r".*(v\d+\.\d+\.\d+).*").expect("Invalid regex")); // Load all the semantic convention registry. - let mut registry = SemConvRegistry::new(registry_repo.id().as_ref()); + let mut registry = SemConvRegistry::new(registry_repo.name().as_ref()); for spec in semconv_specs { registry.add_semconv_spec(spec); @@ -145,9 +145,16 @@ impl SemConvRegistry { registry.set_manifest(RegistryManifest { file_format: None, - schema_url: registry_repo.manifest().and_then(|m| Some(m.schema_url.clone())).unwrap_or_default(), - schema_base_url: registry_repo.manifest().and_then(|m| m.schema_base_url.clone()), - semconv_version: registry_repo.manifest().and_then(|m| m.semconv_version.clone()), + schema_url: registry_repo + .manifest() + .and_then(|m| Some(m.schema_url.clone())) + .unwrap_or_default(), + schema_base_url: registry_repo + .manifest() + .and_then(|m| m.schema_base_url.clone()), + semconv_version: registry_repo + .manifest() + .and_then(|m| m.semconv_version.clone()), description: registry_repo.manifest().and_then(|m| m.description.clone()), dependencies: vec![], resolved_schema_uri: None, @@ -386,7 +393,8 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "data".to_owned(), }; - let registry_repo = RegistryRepo::try_new("test", ®istry_path).unwrap(); + let registry_repo = + RegistryRepo::try_new(Some("test"), Some("1.0.0"), ®istry_path).unwrap(); let registry = SemConvRegistry::from_semconv_specs(®istry_repo, semconv_specs).unwrap(); assert_eq!(registry.id(), "test"); assert_eq!(registry.semconv_spec_count(), 2); diff --git a/crates/weaver_semconv/src/registry_repo.rs b/crates/weaver_semconv/src/registry_repo.rs index c91f89d57..8104b79bc 100644 --- a/crates/weaver_semconv/src/registry_repo.rs +++ b/crates/weaver_semconv/src/registry_repo.rs @@ -6,15 +6,13 @@ use std::default::Default; use std::path::{Path, PathBuf}; use std::sync::Arc; -use crate::manifest::RegistryManifest; +use crate::manifest::{RegistryManifest, SchemaUrl}; use crate::Error; use weaver_common::vdir::{VirtualDirectory, VirtualDirectoryPath}; use weaver_common::{get_path_type, log_info, log_warn}; /// The name of the legacy registry manifest file. -#[deprecated( - note = "The registry manifest file is renamed to `manifest.yaml`." -)] +#[deprecated(note = "The registry manifest file is renamed to `manifest.yaml`.")] pub const LEGACY_REGISTRY_MANIFEST: &str = "registry_manifest.yaml"; /// The name of the registry manifest file. @@ -29,8 +27,11 @@ pub const REGISTRY_MANIFEST: &str = "manifest.yaml"; /// that denotes where to find aspects of the registry. #[derive(Default, Debug, Clone)] pub struct RegistryRepo { - // A unique identifier for the registry (e.g. main, baseline, etc.) - id: Arc, + // A unique identifier for the registry (e.g. opentelemetry.io/schemas/sub-component). + name: Arc, + + // Registry version + version: Arc, // A virtual directory containing the registry. registry: VirtualDirectory, @@ -40,30 +41,73 @@ pub struct RegistryRepo { } impl RegistryRepo { - /// Creates a new `RegistryRepo` from a `RegistryPath` object that + /// Creates a new `RegistryRepo` from a name, version, and `RegistryPath` object that /// specifies the location of the registry. + /// If there is no manifest, name and version must be provided. pub fn try_new( - registry_id_if_no_manifest: &str, + name: Option<&str>, + version: Option<&str>, registry_path: &VirtualDirectoryPath, ) -> Result { - let mut registry_repo = Self { - id: Arc::from(registry_id_if_no_manifest), - registry: VirtualDirectory::try_new(registry_path) - .map_err(Error::VirtualDirectoryError)?, - manifest: None, - }; - if let Some(manifest) = registry_repo.manifest_path() { - let registry_manifest = RegistryManifest::try_from_file(manifest)?; - registry_repo.id = Arc::from(registry_manifest.name().as_str()); - registry_repo.manifest = Some(registry_manifest); + let registry = + VirtualDirectory::try_new(registry_path).map_err(Error::VirtualDirectoryError)?; + let mut manifest = None; + let mut registry_name = None; + let mut registry_version = None; + // Try to load manifest + if let Some(manifest_path) = { + // We need a temporary RegistryRepo to call manifest_path + let temp_repo = Self { + name: Arc::from(""), + version: Arc::from(""), + registry: registry.clone(), + manifest: None, + }; + temp_repo.manifest_path() + } { + let registry_manifest = RegistryManifest::try_from_file(manifest_path)?; + registry_name = Some(Arc::from(registry_manifest.name().as_str())); + registry_version = Some(Arc::from(registry_manifest.version().as_str())); + manifest = Some(registry_manifest); + } else { + // No manifest, require name and version + let name = name.ok_or_else(|| Error::InvalidRegistryManifest { + path: registry.path().to_path_buf(), + error: "Registry name must be provided if no manifest is present.".to_string(), + })?; + let version = version.ok_or_else(|| Error::InvalidRegistryManifest { + path: registry.path().to_path_buf(), + error: "Registry version must be provided if no manifest is present.".to_string(), + })?; + registry_name = Some(Arc::from(name)); + registry_version = Some(Arc::from(version)); + } + Ok(Self { + name: registry_name.unwrap(), + version: registry_version.unwrap(), + registry, + manifest, + }) + } + + /// Returns the registry name (from manifest if present, otherwise top-level field). + #[must_use] + pub fn name(&self) -> Arc { + if let Some(manifest) = &self.manifest { + Arc::from(manifest.name()) + } else { + self.name.clone() } - Ok(registry_repo) } - /// Returns the unique identifier for the registry. + /// Returns the registry version (from manifest if present, otherwise top-level field). #[must_use] - pub fn id(&self) -> Arc { - self.id.clone() + pub fn version(&self) -> Arc { + if let Some(manifest) = &self.manifest { + Arc::from(manifest.version()) + } else { + self.version.clone() + } } /// Returns the local path to the semconv registry. @@ -142,8 +186,14 @@ impl RegistryRepo { /// Returns the registry schema URL, if available in the manifest. #[must_use] - pub fn schema_url(&self) -> Option { - self.manifest.as_ref().and_then(|manifest| manifest.schema_url.clone()) + pub fn schema_url(&self) -> SchemaUrl { + // TODO: we should never have a registry without a schema URL at this point + // but not sure how to do it in terms of API design + // but for now we can just panic if we don't find a schema URL + self.manifest + .as_ref() + .and_then(|manifest| manifest.schema_url.clone()) + .expect("Schema URL must have been provided") } } @@ -167,7 +217,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "../../crates/weaver_codegen_test/semconv_registry".to_owned(), }; - let repo = RegistryRepo::try_new("main", ®istry_path).unwrap(); + let repo = RegistryRepo::try_new(Some("main"), Some("1.0.0"), ®istry_path).unwrap(); let repo_path = repo.path().to_path_buf(); assert!(repo_path.exists()); assert!( @@ -186,8 +236,8 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "tests/published_repository/resolved/1.0.0".to_owned(), }; - let repo = - RegistryRepo::try_new("main", ®istry_path).expect("Failed to load test repository."); + let repo = RegistryRepo::try_new(None, None, ®istry_path) + .expect("Failed to load test repository."); let Some(manifest) = repo.manifest() else { panic!("Did not resolve manifest for repo: {repo:?}"); }; @@ -208,8 +258,8 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "tests/published_repository/resolved/2.0.0".to_owned(), }; - let repo = - RegistryRepo::try_new("main", ®istry_path).expect("Failed to load test repository."); + let repo = RegistryRepo::try_new(None, None, ®istry_path) + .expect("Failed to load test repository."); let Some(resolved_path) = repo.resolved_schema_uri() else { panic!( "Should find a resolved schema path from manifest in {}", @@ -222,8 +272,8 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "tests/published_repository/3.0.0".to_owned(), }; - let repo = - RegistryRepo::try_new("main", ®istry_path).expect("Failed to load test repository."); + let repo = RegistryRepo::try_new(None, None, ®istry_path) + .expect("Failed to load test repository."); let Some(resolved_path) = repo.resolved_schema_uri() else { panic!( "Should find a resolved schema path from manifest in {}", diff --git a/crates/weaver_semconv/tests/published_repository/3.0.0/registry_manifest.yaml b/crates/weaver_semconv/tests/published_repository/3.0.0/registry_manifest.yaml index c4fc26958..0c752664c 100644 --- a/crates/weaver_semconv/tests/published_repository/3.0.0/registry_manifest.yaml +++ b/crates/weaver_semconv/tests/published_repository/3.0.0/registry_manifest.yaml @@ -1,7 +1,6 @@ file_format: manifest/2.0.0 -name: resolved description: Test repository that has been resolved. -version: 3.0.0 +schema_url: http://resolved/3.0.0 repository_url: https://github.com/open-telemetry/weaver.git stability: stable resolved_schema_uri: resolved_schema.yaml diff --git a/crates/weaver_semconv/tests/published_repository/resolved/1.0.0 b/crates/weaver_semconv/tests/published_repository/resolved/1.0.0 index aa8518c7b..b65691d98 100644 --- a/crates/weaver_semconv/tests/published_repository/resolved/1.0.0 +++ b/crates/weaver_semconv/tests/published_repository/resolved/1.0.0 @@ -1,7 +1,6 @@ file_format: manifest/2.0.0 -name: resolved description: Test repository that has been resolved. -version: 1.0.0 +schema_url: http://resolved/1.0.0 repository_url: https://github.com/open-telemetry/weaver.git stability: stable resolved_schema_uri: resolved_1.0.0.yaml diff --git a/crates/weaver_semconv/tests/published_repository/resolved/2.0.0 b/crates/weaver_semconv/tests/published_repository/resolved/2.0.0 index 91bbc3a6b..d8bf526e8 100644 --- a/crates/weaver_semconv/tests/published_repository/resolved/2.0.0 +++ b/crates/weaver_semconv/tests/published_repository/resolved/2.0.0 @@ -1,7 +1,6 @@ file_format: manifest/2.0.0 -name: resolved description: Test repository that has been resolved. -version: 2.0.0 +schema_url: http://resolved/2.0.0 repository_url: https://github.com/open-telemetry/weaver.git stability: stable resolved_schema_uri: https://github.com/open-telemetry/weaver.git\creates/weaver_semconv/tests/published_respository/resolved/resolved_2.0.0 diff --git a/crates/weaver_semconv_gen/data_v2/templates.md b/crates/weaver_semconv_gen/data_v2/templates.md index 28b87f7eb..1d9e54184 100644 --- a/crates/weaver_semconv_gen/data_v2/templates.md +++ b/crates/weaver_semconv_gen/data_v2/templates.md @@ -22,8 +22,8 @@ test.common Custom Snippet Name - -todo/1.0.0 + +https://todo/1.0.0 diff --git a/crates/weaver_semconv_gen/src/v1.rs b/crates/weaver_semconv_gen/src/v1.rs index 7579d61a8..27239e420 100644 --- a/crates/weaver_semconv_gen/src/v1.rs +++ b/crates/weaver_semconv_gen/src/v1.rs @@ -198,7 +198,7 @@ mod tests { path: "data".to_owned(), }; let mut diag_msgs = DiagnosticMessages::empty(); - let registry_repo = RegistryRepo::try_new("main", ®istry_path)?; + let registry_repo = RegistryRepo::try_new(Some("main"), Some("1.0.0"), ®istry_path)?; let generator = SnippetGenerator::try_from_registry_repo( ®istry_repo, template, diff --git a/crates/weaver_semconv_gen/src/v2.rs b/crates/weaver_semconv_gen/src/v2.rs index a83adf9a8..ae852bb2e 100644 --- a/crates/weaver_semconv_gen/src/v2.rs +++ b/crates/weaver_semconv_gen/src/v2.rs @@ -413,6 +413,7 @@ mod tests { use weaver_semconv::{ attribute::{AttributeType, PrimitiveOrArrayTypeSpec}, group::InstrumentSpec, + manifest::SchemaUrl, v2::{span::SpanName, CommonFields}, }; @@ -451,8 +452,7 @@ mod tests { fn test_registry() -> ResolvedTelemetrySchema { ResolvedTelemetrySchema { file_format: "resolved/2.0.0".to_owned(), - schema_url: "todo/1.0.0".to_owned(), - registry_id: "main".to_owned(), + schema_url: SchemaUrl("https://todo/1.0.0".to_owned()), attribute_catalog: vec![Attribute { key: "attr1".to_owned(), r#type: AttributeType::PrimitiveOrArray(PrimitiveOrArrayTypeSpec::String), @@ -466,7 +466,6 @@ mod tests { attributes: vec![AttributeRef(0)], common: CommonFields::default(), }], - registry_url: "todo".to_owned(), spans: vec![Span { r#type: "trace.test".to_owned().into(), kind: weaver_semconv::group::SpanKindSpec::Client, diff --git a/src/registry/check.rs b/src/registry/check.rs index 0f2a981da..427ee42ec 100644 --- a/src/registry/check.rs +++ b/src/registry/check.rs @@ -43,7 +43,7 @@ pub(crate) fn command(args: &RegistryCheckArgs) -> Result Result Result>) -> impl Into let registry = &state.registry; let stats = RegistryStats { - registry_url: registry.registry_url.clone(), + schema_url: registry.schema_url.to_string(), counts: RegistryCounts { attributes: registry.registry.attributes.len(), metrics: registry.registry.metrics.len(), diff --git a/src/serve/types.rs b/src/serve/types.rs index ed894c948..b750b9f10 100644 --- a/src/serve/types.rs +++ b/src/serve/types.rs @@ -10,8 +10,8 @@ use weaver_semconv::stability::Stability; /// Registry stats response. #[derive(Debug, Serialize, ToSchema)] pub struct RegistryStats { - /// The registry URL. - pub registry_url: String, + /// The schema URL. + pub schema_url: String, /// Counts of different entity types. pub counts: RegistryCounts, // TODO: It would be better to serve the output of `weaver registry stats` here diff --git a/src/weaver.rs b/src/weaver.rs index 19f018bbe..ed1423629 100644 --- a/src/weaver.rs +++ b/src/weaver.rs @@ -58,7 +58,8 @@ impl<'a> WeaverEngine<'a> { diag_msgs: &mut DiagnosticMessages, ) -> Result { let registry_path = &self.registry_config.registry; - let main_registry_repo = RegistryRepo::try_new("main", registry_path)?; + let main_registry_repo = + RegistryRepo::try_new(Some("unknown"), Some("unknown"), registry_path)?; self.load_definitions(main_registry_repo, diag_msgs) } diff --git a/tests/custom_registry/registry_manifest.yaml b/tests/custom_registry/registry_manifest.yaml index fd749cc86..ced966aa3 100644 --- a/tests/custom_registry/registry_manifest.yaml +++ b/tests/custom_registry/registry_manifest.yaml @@ -3,5 +3,5 @@ description: This registry contains the semantic conventions for the Acme vendor semconv_version: 0.1.0 schema_base_url: https://acme.com/schemas/ dependencies: - - name: otel + - schema_url: https://opentelemetry.io/schemas/1.30.0 registry_path: https://github.com/open-telemetry/semantic-conventions/archive/refs/tags/v1.30.0.zip[model] diff --git a/tests/registry_stats.rs b/tests/registry_stats.rs index a05ae06dd..32514720e 100644 --- a/tests/registry_stats.rs +++ b/tests/registry_stats.rs @@ -18,5 +18,9 @@ fn test_cli_interface() { .output() .expect("failed to execute process"); - assert!(output.status.success()); + assert!( + output.status.success(), + "Process did not exit successfully. Stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); } diff --git a/tests/resolution_process.rs b/tests/resolution_process.rs index 2e09f3afd..da1824745 100644 --- a/tests/resolution_process.rs +++ b/tests/resolution_process.rs @@ -33,9 +33,14 @@ fn test_cli_interface() { sub_folder: Some(SEMCONV_REGISTRY_MODEL.to_owned()), refspec: None, }; - let registry_repo = RegistryRepo::try_new("main", ®istry_path).unwrap_or_else(|e| { - panic!("Failed to create the registry repo, error: {e}"); - }); + + let registry_name = "opentelemetry.io/schemas"; + let registry_version = "1.40.0"; + let registry_repo = + RegistryRepo::try_new(Some(registry_name), Some(registry_version), ®istry_path) + .unwrap_or_else(|e| { + panic!("Failed to create the registry repo, error: {e}"); + }); let loaded = SchemaResolver::load_semconv_repository(registry_repo, false) .ignore(|e| matches!(e.severity(), Some(miette::Severity::Warning))) .into_result_failing_non_fatal() From 9be8d84fae9ec964a0ec430646cd76123a4848ef Mon Sep 17 00:00:00 2001 From: Liudmila Molkova Date: Sat, 14 Feb 2026 11:47:38 -0800 Subject: [PATCH 09/27] mostly ready --- crates/weaver_codegen_test/build.rs | 2 +- crates/weaver_forge/src/lib.rs | 16 +++--- crates/weaver_forge/src/v2/registry.rs | 6 +-- crates/weaver_resolved_schema/src/lib.rs | 6 +-- crates/weaver_resolved_schema/src/v2/mod.rs | 11 ++-- .../registry_b/registry_manifest.yaml | 5 +- crates/weaver_resolver/src/dependency.rs | 2 +- crates/weaver_resolver/src/lib.rs | 6 +-- crates/weaver_resolver/src/loader.rs | 21 ++------ crates/weaver_resolver/src/registry.rs | 12 ++--- crates/weaver_semconv/src/manifest.rs | 48 +++++++++++++++-- crates/weaver_semconv/src/registry.rs | 3 +- crates/weaver_semconv/src/registry_repo.rs | 52 +++++++++++-------- crates/weaver_semconv_gen/src/v1.rs | 2 +- crates/weaver_semconv_gen/src/v2.rs | 3 +- src/registry/check.rs | 2 +- src/registry/diff.rs | 5 +- src/registry/resolve.rs | 2 +- src/weaver.rs | 2 +- tests/resolution_process.rs | 7 ++- 20 files changed, 118 insertions(+), 95 deletions(-) diff --git a/crates/weaver_codegen_test/build.rs b/crates/weaver_codegen_test/build.rs index 8183376e2..5acd7ec84 100644 --- a/crates/weaver_codegen_test/build.rs +++ b/crates/weaver_codegen_test/build.rs @@ -42,7 +42,7 @@ fn main() { let registry_path = VirtualDirectoryPath::LocalFolder { path: SEMCONV_REGISTRY_PATH.into(), }; - let registry_repo = RegistryRepo::try_new(Some("main"), Some("1.0.0"), ®istry_path) + let registry_repo = RegistryRepo::try_new(None, ®istry_path) .unwrap_or_else(|e| process_error(&logger, e)); let loaded = SchemaResolver::load_semconv_repository(registry_repo, FOLLOW_SYMLINKS) .ignore(|e| matches!(e.severity(), Some(miette::Severity::Warning))) diff --git a/crates/weaver_forge/src/lib.rs b/crates/weaver_forge/src/lib.rs index f432ed2f2..44bd70069 100644 --- a/crates/weaver_forge/src/lib.rs +++ b/crates/weaver_forge/src/lib.rs @@ -841,6 +841,7 @@ mod tests { use weaver_common::vdir::VirtualDirectoryPath; use weaver_diff::diff_dir; use weaver_resolver::{LoadedSemconvRegistry, SchemaResolver}; + use weaver_semconv::manifest::SchemaUrl; use weaver_semconv::registry_repo::RegistryRepo; use crate::config::{ApplicationMode, CaseConvention, Params, TemplateConfig, WeaverConfig}; @@ -855,12 +856,11 @@ mod tests { cli_params: Params, ignore_non_fatal_errors: bool, ) -> (TemplateEngine, ResolvedRegistry, PathBuf, PathBuf) { - let registry_id = "default"; - let registry_version = "1.0.0"; + let schema_url = Some(SchemaUrl("https://default/1.0.0".to_owned())); let path: VirtualDirectoryPath = "data/registry" .try_into() .expect("Invalid virtual directory path string"); - let repo = RegistryRepo::try_new(Some(registry_id), Some(registry_version), &path) + let repo = RegistryRepo::try_new(schema_url, &path) .expect("Failed to construct repository"); let registry_result = SchemaResolver::load_semconv_repository(repo, false); // SemConvRegistry::try_from_path_pattern(registry_id, "data/*.yaml"); @@ -1057,12 +1057,11 @@ mod tests { }); engine.target_config.templates = Some(templates); - let registry_id = "default"; let path: VirtualDirectoryPath = "data/registry" .try_into() .expect("Invalid virtual directory path string"); - let registry_version = "1.0.0"; - let repo = RegistryRepo::try_new(Some(registry_id), Some(registry_version), &path) + let schema_url = Some(SchemaUrl("https://default/1.0.0".to_owned())); + let repo = RegistryRepo::try_new(schema_url, &path) .expect("Failed to construct repository"); let loaded = SchemaResolver::load_semconv_repository(repo, false) .into_result_with_non_fatal() @@ -1188,12 +1187,11 @@ mod tests { #[test] fn test_comment_format() { - let registry_id = "default"; let path: VirtualDirectoryPath = "data/mini_registry_for_comments" .try_into() .expect("Invalid virtual directory path string"); - let registry_version = "1.0.0"; - let repo = RegistryRepo::try_new(Some(registry_id), Some(registry_version), &path) + let schema_url = Some(SchemaUrl("https://default/1.0.0".to_owned())); + let repo = RegistryRepo::try_new(schema_url, &path) .expect("Failed to construct repository"); let loaded = SchemaResolver::load_semconv_repository(repo, false) .into_result_with_non_fatal() diff --git a/crates/weaver_forge/src/v2/registry.rs b/crates/weaver_forge/src/v2/registry.rs index cd219d683..b81d4e949 100644 --- a/crates/weaver_forge/src/v2/registry.rs +++ b/crates/weaver_forge/src/v2/registry.rs @@ -560,8 +560,7 @@ mod tests { common: CommonFields::default(), }, }], - }, - registry_manifest: None, + } }; let forge_registry = @@ -640,8 +639,7 @@ mod tests { spans: vec![], metrics: vec![], events: vec![], - }, - registry_manifest: None, + } }; let result = ForgeResolvedRegistry::try_from(resolved_schema); diff --git a/crates/weaver_resolved_schema/src/lib.rs b/crates/weaver_resolved_schema/src/lib.rs index 655ddbb21..5d1d3b6f8 100644 --- a/crates/weaver_resolved_schema/src/lib.rs +++ b/crates/weaver_resolved_schema/src/lib.rs @@ -104,7 +104,7 @@ impl ResolvedTelemetrySchema { instrumentation_library: None, dependencies: vec![], versions: None, - registry_manifest: None, + manifest: None, } } @@ -295,13 +295,13 @@ impl ResolvedTelemetrySchema { pub fn diff(&self, baseline_schema: &ResolvedTelemetrySchema) -> SchemaChanges { let mut changes = SchemaChanges::new(); - if let Some(ref manifest) = self.registry_manifest { + if let Some(ref manifest) = self.manifest { changes.set_head_manifest(weaver_version::schema_changes::RegistryManifest { semconv_version: manifest.version().clone(), }); } - if let Some(ref manifest) = baseline_schema.registry_manifest { + if let Some(ref manifest) = baseline_schema.manifest { changes.set_baseline_manifest(weaver_version::schema_changes::RegistryManifest { semconv_version: manifest.version().clone(), }); diff --git a/crates/weaver_resolved_schema/src/v2/mod.rs b/crates/weaver_resolved_schema/src/v2/mod.rs index 7acc570e4..23e2cdad4 100644 --- a/crates/weaver_resolved_schema/src/v2/mod.rs +++ b/crates/weaver_resolved_schema/src/v2/mod.rs @@ -57,9 +57,6 @@ pub struct ResolvedTelemetrySchema { pub registry: Registry, /// Refinements for the registry pub refinements: Refinements, - /// The manifest of the registry. - #[serde(skip)] - pub manifest: Option, } impl ResolvedTelemetrySchema { @@ -131,8 +128,7 @@ impl TryFrom for ResolvedTelemetrySchema { schema_url: value.schema_url, attribute_catalog, registry, - refinements, - registry_manifest: None, + refinements }) } } @@ -995,7 +991,7 @@ mod tests { resource: None, dependencies: vec![], versions: None, - registry_manifest: None, + manifest: None, }; let v2_schema: Result = v1_schema.try_into(); @@ -1227,8 +1223,7 @@ mod tests { spans: vec![], metrics: vec![], events: vec![], - }, - registry_manifest: None, + } } } } diff --git a/crates/weaver_resolver/data/circular-registry-test/registry_b/registry_manifest.yaml b/crates/weaver_resolver/data/circular-registry-test/registry_b/registry_manifest.yaml index f50389496..f4d66d2e4 100644 --- a/crates/weaver_resolver/data/circular-registry-test/registry_b/registry_manifest.yaml +++ b/crates/weaver_resolver/data/circular-registry-test/registry_b/registry_manifest.yaml @@ -3,7 +3,6 @@ description: Test registry B for circular dependency testing. semconv_version: 0.1.0 schema_base_url: https://example.com/registry_b/schemas/ dependencies: - # TODO: support legacy name-based dependencies as well (with warning) - #- name: registry_a - - schema_url: https://example.com/registry_a/schemas/1.0.0 + - name: registry_a + # schema_url: is not necessry here, we're using deprecated, but valid foe now `name` registry_path: data/circular-registry-test/registry_a \ No newline at end of file diff --git a/crates/weaver_resolver/src/dependency.rs b/crates/weaver_resolver/src/dependency.rs index 7345d14ba..fda6d54a2 100644 --- a/crates/weaver_resolver/src/dependency.rs +++ b/crates/weaver_resolver/src/dependency.rs @@ -523,7 +523,7 @@ mod tests { instrumentation_library: None, dependencies: vec![], versions: None, - registry_manifest: None, + manifest: None, } } } diff --git a/crates/weaver_resolver/src/lib.rs b/crates/weaver_resolver/src/lib.rs index 587e8ddd9..98952d291 100644 --- a/crates/weaver_resolver/src/lib.rs +++ b/crates/weaver_resolver/src/lib.rs @@ -125,7 +125,7 @@ impl SchemaResolver { instrumentation_library: None, dependencies: vec![], versions: None, // ToDo LQ: Implement this! - registry_manifest: manifest, + manifest: manifest, } }) } @@ -247,7 +247,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "data/multi-registry/custom_registry".to_owned(), }; - let registry_repo = RegistryRepo::try_new(Some("main"), Some("1.0.0"), ®istry_path)?; + let registry_repo = RegistryRepo::try_new(None, ®istry_path)?; // test with the `include_unreferenced` flag set to false check_semconv_load_and_resolve(registry_repo.clone(), false); // test with the `include_unreferenced` flag set to true @@ -261,7 +261,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "data/multi-registry/app_registry".to_owned(), }; - let registry_repo = RegistryRepo::try_new(None, None, ®istry_path)?; + let registry_repo = RegistryRepo::try_new(None, ®istry_path)?; let result = SchemaResolver::load_semconv_repository(registry_repo, true); match result { diff --git a/crates/weaver_resolver/src/loader.rs b/crates/weaver_resolver/src/loader.rs index 34190c5d0..6317a0450 100644 --- a/crates/weaver_resolver/src/loader.rs +++ b/crates/weaver_resolver/src/loader.rs @@ -48,7 +48,7 @@ impl LoadedSemconvRegistry { use weaver_common::vdir::VirtualDirectoryPath; use weaver_semconv::provenance::Provenance; let path: VirtualDirectoryPath = "data".try_into().expect("Bad fake path for test"); - let repo = RegistryRepo::try_new(Some("default"), Some("1.0.0"), &path).map_err(|e| { + let repo = RegistryRepo::try_new(None, &path).map_err(|e| { Error::InvalidUrl { url: "test string".to_owned(), error: format!("{e}"), @@ -204,18 +204,7 @@ fn load_semconv_repository_recursive( let mut loaded_dependencies = vec![]; let mut non_fatal_errors = vec![]; for d in manifest.dependencies.iter() { - let registry_path = d.registry_path.clone().unwrap_or_else(|| { - // If no registry path is provided, we assume it's the same as the parent registry. - VirtualDirectoryPath::RemoteArchive { - url: d.schema_url.to_string(), - sub_folder: None, - } - }); - match RegistryRepo::try_new( - Some(&d.schema_url.name()), - Some(&d.schema_url.version()), - ®istry_path, - ) { + match RegistryRepo::try_new_dependency(&d) { Ok(d_repo) => { // so we need to make sure the dependency chain only include direct dependencies of each other. match load_semconv_repository_recursive( @@ -411,7 +400,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "data/multi-registry/custom_registry".to_owned(), }; - let registry_repo = RegistryRepo::try_new(Some("main"), Some("1.0.0"), ®istry_path)?; + let registry_repo = RegistryRepo::try_new(None, ®istry_path)?; let mut diag_msgs = DiagnosticMessages::empty(); let loaded = load_semconv_repository(registry_repo, false) .capture_non_fatal_errors(&mut diag_msgs)?; @@ -453,7 +442,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "data/multi-registry/app_registry".to_owned(), }; - let registry_repo = RegistryRepo::try_new(Some("app"), Some("1.0.0"), ®istry_path)?; + let registry_repo = RegistryRepo::try_new(None, ®istry_path)?; // Try with depth limit of 1 - should fail at acme->otel transition let mut visited_registries = HashSet::new(); @@ -488,7 +477,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "data/circular-registry-test/registry_a".to_owned(), }; - let registry_repo = RegistryRepo::try_new(Some("registry_a"), Some(""), ®istry_path)?; + let registry_repo = RegistryRepo::try_new(None, ®istry_path)?; let result = load_semconv_repository(registry_repo, true); match result { diff --git a/crates/weaver_resolver/src/registry.rs b/crates/weaver_resolver/src/registry.rs index e1efbf897..c2278e745 100644 --- a/crates/weaver_resolver/src/registry.rs +++ b/crates/weaver_resolver/src/registry.rs @@ -846,6 +846,7 @@ pub(crate) fn cleanup_and_stabilize_catalog_and_registry( mod tests { use rand::rng; use rand::seq::SliceRandom; + use weaver_semconv::manifest::SchemaUrl; use std::cmp::Ordering; use std::collections::HashMap; use std::error::Error; @@ -918,13 +919,12 @@ mod tests { let observed_output_dir = PathBuf::from(format!("observed_output/{test_dir}")); std::fs::create_dir_all(observed_output_dir.clone()) .expect("Failed to create observed output directory"); - let registry_name = "default"; - let registry_version = "0.1.0"; + let schema_url = Some(SchemaUrl("https://default/0.1.0".to_owned())); let location: VirtualDirectoryPath = format!("{test_dir}/registry") .try_into() .expect("Failed to parse file directory"); let loaded = SchemaResolver::load_semconv_repository( - RegistryRepo::try_new(Some(registry_name), Some(registry_version), &location) + RegistryRepo::try_new(schema_url, &location) .expect("Failed to load registry"), true, ) @@ -1106,9 +1106,6 @@ groups: #[test] fn test_api_usage() -> Result<(), Box> { - let registry_id = "local"; - let registry_version = "1.0.0"; - // Load a semantic convention registry from a local directory. // Note: A method is also available to load a registry from a git // repository. @@ -1117,7 +1114,8 @@ groups: path: "data/registry-test-7-spans/registry".to_owned(), }; - let repo = RegistryRepo::try_new(Some(registry_id), Some(registry_version), &path)?; + let schema_url = Some(SchemaUrl(format!("https://local/registry/1.0.0"))); + let repo = RegistryRepo::try_new(schema_url, &path)?; let loaded = SchemaResolver::load_semconv_repository(repo, true).into_result_failing_non_fatal()?; let resolved_schema = diff --git a/crates/weaver_semconv/src/manifest.rs b/crates/weaver_semconv/src/manifest.rs index f3f82721f..f2a72eed9 100644 --- a/crates/weaver_semconv/src/manifest.rs +++ b/crates/weaver_semconv/src/manifest.rs @@ -128,14 +128,14 @@ pub struct RegistryManifest { pub description: Option, /// The version of the registry which will be used to define the semconv package version. - #[serde(skip_serializing_if = "Option::is_none", default)] + #[serde(default, skip_serializing)] #[deprecated( note = "The `version` field is deprecated. The registry version should be specified in the `schema_url` field, which is required and serves as a unique identifier for the registry." )] pub semconv_version: Option, /// The base URL where the registry's schema files are hosted. - #[serde(skip_serializing_if = "Option::is_none", default)] + #[serde(default, skip_serializing)] #[deprecated( note = "The `schema_base_url` field is deprecated. The registry schema URL should be specified in the `schema_url` field, which is required and serves as a unique identifier for the registry." )] @@ -157,7 +157,7 @@ pub struct RegistryManifest { } /// Represents a dependency of a semantic convention registry as defined in YAML. -#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] +#[derive(Serialize, Debug, Clone, JsonSchema)] pub struct Dependency { /// The schema URL for the dependency (required). /// It must follow OTel schema URL format, which is: `http[s]://server[:port]/path/`. @@ -175,6 +175,48 @@ pub struct Dependency { /// - A directory containing the raw definition. #[serde(skip_serializing_if = "Option::is_none")] pub registry_path: Option, + + /// This field is deprecated and should not be used. + /// The registry name should be derived from the `schema_url` field, + /// which serves as a unique identifier for the dependency registry + /// and includes registry version. + #[deprecated( + note = "The `name` field is deprecated. The registry name should be derived from the `schema_url` field, which serves as a unique identifier for the dependency registry." + )] + #[serde(default, skip_serializing)] // we can read, but won't write this field + pub name: Option, +} + +impl<'de> Deserialize<'de> for Dependency { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + #[derive(Deserialize)] + struct DependencyHelper { + name: Option, + schema_url: Option, + registry_path: Option, + } + + let helper = DependencyHelper::deserialize(deserializer)?; + + let schema_url = match (helper.schema_url, helper.name) { + (Some(url), _) => url, + (None, Some(name)) => SchemaUrl(format!("{}/unknown", name)), + (None, None) => { + return Err(serde::de::Error::custom( + "Either 'schema_url' or 'name' must be provided for a dependency" + )) + } + }; + + Ok(Dependency { + schema_url, + registry_path: helper.registry_path, + name: None, + }) + } } impl RegistryManifest { diff --git a/crates/weaver_semconv/src/registry.rs b/crates/weaver_semconv/src/registry.rs index 11251cd95..2afbfb507 100644 --- a/crates/weaver_semconv/src/registry.rs +++ b/crates/weaver_semconv/src/registry.rs @@ -286,6 +286,7 @@ impl SemConvRegistry { mod tests { use crate::attribute::{AttributeSpec, AttributeType, PrimitiveOrArrayTypeSpec}; use crate::group::{GroupSpec, GroupType}; + use crate::manifest::SchemaUrl; use crate::provenance::Provenance; use crate::registry::SemConvRegistry; use crate::registry_repo::RegistryRepo; @@ -394,7 +395,7 @@ mod tests { path: "data".to_owned(), }; let registry_repo = - RegistryRepo::try_new(Some("test"), Some("1.0.0"), ®istry_path).unwrap(); + RegistryRepo::try_new(Some(SchemaUrl("https://test/42".to_owned())), ®istry_path).unwrap(); let registry = SemConvRegistry::from_semconv_specs(®istry_repo, semconv_specs).unwrap(); assert_eq!(registry.id(), "test"); assert_eq!(registry.semconv_spec_count(), 2); diff --git a/crates/weaver_semconv/src/registry_repo.rs b/crates/weaver_semconv/src/registry_repo.rs index 8104b79bc..2a5de63fe 100644 --- a/crates/weaver_semconv/src/registry_repo.rs +++ b/crates/weaver_semconv/src/registry_repo.rs @@ -6,7 +6,7 @@ use std::default::Default; use std::path::{Path, PathBuf}; use std::sync::Arc; -use crate::manifest::{RegistryManifest, SchemaUrl}; +use crate::manifest::{Dependency, RegistryManifest, SchemaUrl}; use crate::Error; use weaver_common::vdir::{VirtualDirectory, VirtualDirectoryPath}; use weaver_common::{get_path_type, log_info, log_warn}; @@ -41,12 +41,26 @@ pub struct RegistryRepo { } impl RegistryRepo { - /// Creates a new `RegistryRepo` from a name, version, and `RegistryPath` object that + /// Creates a new `RegistryRepo` from a `Dependency` object that specifies the schema URL and path. + pub fn try_new_dependency( + dependency: &Dependency, + ) -> Result { + let path = dependency.registry_path.clone().unwrap_or_else(|| { + // If no registry path is provided, we assume it's the same as the parent registry. + VirtualDirectoryPath::RemoteArchive { + url: dependency.schema_url.to_string(), + sub_folder: None, + } + }); + Self::try_new(Some(dependency.schema_url.clone()), &path) + } + + /// Creates a new `RegistryRepo` from a schema URL and `RegistryPath` object that /// specifies the location of the registry. - /// If there is no manifest, name and version must be provided. + /// If there is no manifest and schema URL is not provided, registry + /// name and version are set to "unknown". pub fn try_new( - name: Option<&str>, - version: Option<&str>, + schema_url: Option, registry_path: &VirtualDirectoryPath, ) -> Result { let registry = @@ -66,25 +80,17 @@ impl RegistryRepo { temp_repo.manifest_path() } { let registry_manifest = RegistryManifest::try_from_file(manifest_path)?; - registry_name = Some(Arc::from(registry_manifest.name().as_str())); - registry_version = Some(Arc::from(registry_manifest.version().as_str())); + registry_name = Some(registry_manifest.name()); + registry_version = Some(registry_manifest.version()); manifest = Some(registry_manifest); } else { // No manifest, require name and version - let name = name.ok_or_else(|| Error::InvalidRegistryManifest { - path: registry.path().to_path_buf(), - error: "Registry name must be provided if no manifest is present.".to_string(), - })?; - let version = version.ok_or_else(|| Error::InvalidRegistryManifest { - path: registry.path().to_path_buf(), - error: "Registry version must be provided if no manifest is present.".to_string(), - })?; - registry_name = Some(Arc::from(name)); - registry_version = Some(Arc::from(version)); + registry_name = schema_url.as_ref().map(|url| url.name()).or(Some("unknown".to_owned())); + registry_version = schema_url.as_ref().map(|url| url.version()).or(Some("unknown".to_owned())); } Ok(Self { - name: registry_name.unwrap(), - version: registry_version.unwrap(), + name: registry_name.unwrap().into(), + version: registry_version.unwrap().into(), registry, manifest, }) @@ -217,7 +223,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "../../crates/weaver_codegen_test/semconv_registry".to_owned(), }; - let repo = RegistryRepo::try_new(Some("main"), Some("1.0.0"), ®istry_path).unwrap(); + let repo = RegistryRepo::try_new(None, ®istry_path).unwrap(); let repo_path = repo.path().to_path_buf(); assert!(repo_path.exists()); assert!( @@ -236,7 +242,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "tests/published_repository/resolved/1.0.0".to_owned(), }; - let repo = RegistryRepo::try_new(None, None, ®istry_path) + let repo = RegistryRepo::try_new(None, ®istry_path) .expect("Failed to load test repository."); let Some(manifest) = repo.manifest() else { panic!("Did not resolve manifest for repo: {repo:?}"); @@ -258,7 +264,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "tests/published_repository/resolved/2.0.0".to_owned(), }; - let repo = RegistryRepo::try_new(None, None, ®istry_path) + let repo = RegistryRepo::try_new(None, ®istry_path) .expect("Failed to load test repository."); let Some(resolved_path) = repo.resolved_schema_uri() else { panic!( @@ -272,7 +278,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "tests/published_repository/3.0.0".to_owned(), }; - let repo = RegistryRepo::try_new(None, None, ®istry_path) + let repo = RegistryRepo::try_new(None, ®istry_path) .expect("Failed to load test repository."); let Some(resolved_path) = repo.resolved_schema_uri() else { panic!( diff --git a/crates/weaver_semconv_gen/src/v1.rs b/crates/weaver_semconv_gen/src/v1.rs index 27239e420..4ce1e7943 100644 --- a/crates/weaver_semconv_gen/src/v1.rs +++ b/crates/weaver_semconv_gen/src/v1.rs @@ -198,7 +198,7 @@ mod tests { path: "data".to_owned(), }; let mut diag_msgs = DiagnosticMessages::empty(); - let registry_repo = RegistryRepo::try_new(Some("main"), Some("1.0.0"), ®istry_path)?; + let registry_repo = RegistryRepo::try_new(None, ®istry_path)?; let generator = SnippetGenerator::try_from_registry_repo( ®istry_repo, template, diff --git a/crates/weaver_semconv_gen/src/v2.rs b/crates/weaver_semconv_gen/src/v2.rs index ae852bb2e..47ea0369c 100644 --- a/crates/weaver_semconv_gen/src/v2.rs +++ b/crates/weaver_semconv_gen/src/v2.rs @@ -568,8 +568,7 @@ mod tests { common: CommonFields::default(), }, }], - }, - registry_manifest: None, + } } } } diff --git a/src/registry/check.rs b/src/registry/check.rs index 427ee42ec..6e525798f 100644 --- a/src/registry/check.rs +++ b/src/registry/check.rs @@ -43,7 +43,7 @@ pub(crate) fn command(args: &RegistryCheckArgs) -> Result Result Result WeaverEngine<'a> { ) -> Result { let registry_path = &self.registry_config.registry; let main_registry_repo = - RegistryRepo::try_new(Some("unknown"), Some("unknown"), registry_path)?; + RegistryRepo::try_new(None, registry_path)?; self.load_definitions(main_registry_repo, diag_msgs) } diff --git a/tests/resolution_process.rs b/tests/resolution_process.rs index da1824745..050472f81 100644 --- a/tests/resolution_process.rs +++ b/tests/resolution_process.rs @@ -6,7 +6,7 @@ use miette::Diagnostic; use weaver_common::vdir::VirtualDirectoryPath; use weaver_resolver::SchemaResolver; -use weaver_semconv::registry_repo::RegistryRepo; +use weaver_semconv::{manifest::SchemaUrl, registry_repo::RegistryRepo}; /// The URL of the official semantic convention registry. const SEMCONV_REGISTRY_URL: &str = "https://github.com/open-telemetry/semantic-conventions.git"; @@ -34,10 +34,9 @@ fn test_cli_interface() { refspec: None, }; - let registry_name = "opentelemetry.io/schemas"; - let registry_version = "1.40.0"; + let schema_url = Some(SchemaUrl("https://opelemetry.io/schemas/1.40.0".to_owned())); let registry_repo = - RegistryRepo::try_new(Some(registry_name), Some(registry_version), ®istry_path) + RegistryRepo::try_new(schema_url, ®istry_path) .unwrap_or_else(|e| { panic!("Failed to create the registry repo, error: {e}"); }); From db3b09bf00f36dca3385728fa34b6a70bd27bf15 Mon Sep 17 00:00:00 2001 From: Liudmila Molkova Date: Sat, 14 Feb 2026 13:16:33 -0800 Subject: [PATCH 10/27] cleanup --- crates/weaver_codegen_test/build.rs | 4 +- crates/weaver_emit/src/lib.rs | 2 +- crates/weaver_forge/src/lib.rs | 18 +-- crates/weaver_forge/src/v2/registry.rs | 8 +- crates/weaver_live_check/src/advice/mod.rs | 12 +- crates/weaver_live_check/src/lib.rs | 6 +- crates/weaver_live_check/src/live_checker.rs | 28 ++-- crates/weaver_mcp/src/service.rs | 4 +- crates/weaver_resolved_schema/src/lib.rs | 2 +- crates/weaver_resolved_schema/src/v2/mod.rs | 12 +- crates/weaver_resolver/src/dependency.rs | 20 +-- crates/weaver_resolver/src/lib.rs | 8 +- crates/weaver_resolver/src/loader.rs | 20 ++- crates/weaver_resolver/src/registry.rs | 9 +- crates/weaver_search/src/lib.rs | 2 +- crates/weaver_semconv/src/manifest.rs | 138 ++++++++++++------- crates/weaver_semconv/src/registry.rs | 28 ++-- crates/weaver_semconv/src/registry_repo.rs | 64 +++++---- crates/weaver_semconv_gen/src/v2.rs | 4 +- src/registry/diff.rs | 8 +- src/registry/live_check.rs | 2 +- src/registry/resolve.rs | 3 +- src/weaver.rs | 3 +- tests/resolution_process.rs | 10 +- 24 files changed, 228 insertions(+), 187 deletions(-) diff --git a/crates/weaver_codegen_test/build.rs b/crates/weaver_codegen_test/build.rs index 5acd7ec84..584219f08 100644 --- a/crates/weaver_codegen_test/build.rs +++ b/crates/weaver_codegen_test/build.rs @@ -42,8 +42,8 @@ fn main() { let registry_path = VirtualDirectoryPath::LocalFolder { path: SEMCONV_REGISTRY_PATH.into(), }; - let registry_repo = RegistryRepo::try_new(None, ®istry_path) - .unwrap_or_else(|e| process_error(&logger, e)); + let registry_repo = + RegistryRepo::try_new(None, ®istry_path).unwrap_or_else(|e| process_error(&logger, e)); let loaded = SchemaResolver::load_semconv_repository(registry_repo, FOLLOW_SYMLINKS) .ignore(|e| matches!(e.severity(), Some(miette::Severity::Warning))) .into_result_failing_non_fatal() diff --git a/crates/weaver_emit/src/lib.rs b/crates/weaver_emit/src/lib.rs index 1c8477d0f..c00c0b070 100644 --- a/crates/weaver_emit/src/lib.rs +++ b/crates/weaver_emit/src/lib.rs @@ -591,7 +591,7 @@ mod tests { }; let registry = ForgeResolvedRegistry { - schema_url: SchemaUrl("https://example.com/schemas/1.2.3".to_owned()), + schema_url: SchemaUrl::new("https://example.com/schemas/1.2.3".to_owned()), registry: Registry { attributes: vec![], attribute_groups: vec![], diff --git a/crates/weaver_forge/src/lib.rs b/crates/weaver_forge/src/lib.rs index 44bd70069..ba2f59a0d 100644 --- a/crates/weaver_forge/src/lib.rs +++ b/crates/weaver_forge/src/lib.rs @@ -856,12 +856,12 @@ mod tests { cli_params: Params, ignore_non_fatal_errors: bool, ) -> (TemplateEngine, ResolvedRegistry, PathBuf, PathBuf) { - let schema_url = Some(SchemaUrl("https://default/1.0.0".to_owned())); + let schema_url = Some(SchemaUrl::new("https://default/1.0.0".to_owned())); let path: VirtualDirectoryPath = "data/registry" .try_into() .expect("Invalid virtual directory path string"); - let repo = RegistryRepo::try_new(schema_url, &path) - .expect("Failed to construct repository"); + let repo = + RegistryRepo::try_new(schema_url, &path).expect("Failed to construct repository"); let registry_result = SchemaResolver::load_semconv_repository(repo, false); // SemConvRegistry::try_from_path_pattern(registry_id, "data/*.yaml"); let registry = if ignore_non_fatal_errors { @@ -1060,9 +1060,9 @@ mod tests { let path: VirtualDirectoryPath = "data/registry" .try_into() .expect("Invalid virtual directory path string"); - let schema_url = Some(SchemaUrl("https://default/1.0.0".to_owned())); - let repo = RegistryRepo::try_new(schema_url, &path) - .expect("Failed to construct repository"); + let schema_url = Some(SchemaUrl::new("https://default/1.0.0".to_owned())); + let repo = + RegistryRepo::try_new(schema_url, &path).expect("Failed to construct repository"); let loaded = SchemaResolver::load_semconv_repository(repo, false) .into_result_with_non_fatal() .expect("Failed to load registry") @@ -1190,9 +1190,9 @@ mod tests { let path: VirtualDirectoryPath = "data/mini_registry_for_comments" .try_into() .expect("Invalid virtual directory path string"); - let schema_url = Some(SchemaUrl("https://default/1.0.0".to_owned())); - let repo = RegistryRepo::try_new(schema_url, &path) - .expect("Failed to construct repository"); + let schema_url = Some(SchemaUrl::new("https://default/1.0.0".to_owned())); + let repo = + RegistryRepo::try_new(schema_url, &path).expect("Failed to construct repository"); let loaded = SchemaResolver::load_semconv_repository(repo, false) .into_result_with_non_fatal() .expect("Failed to load registry") diff --git a/crates/weaver_forge/src/v2/registry.rs b/crates/weaver_forge/src/v2/registry.rs index b81d4e949..2db318af5 100644 --- a/crates/weaver_forge/src/v2/registry.rs +++ b/crates/weaver_forge/src/v2/registry.rs @@ -448,7 +448,7 @@ mod tests { fn test_try_from_resolved_schema() { let resolved_schema = ResolvedTelemetrySchema { file_format: "2.0.0".to_owned(), - schema_url: SchemaUrl("https://example.com/schema".to_owned()), + schema_url: SchemaUrl::new("https://example.com/schema".to_owned()), attribute_catalog: vec![attribute::Attribute { key: "test.attr".to_owned(), r#type: AttributeType::PrimitiveOrArray(PrimitiveOrArrayTypeSpec::String), @@ -560,7 +560,7 @@ mod tests { common: CommonFields::default(), }, }], - } + }, }; let forge_registry = @@ -610,7 +610,7 @@ mod tests { fn test_try_from_resolved_schema_with_missing_attribute() { let resolved_schema = ResolvedTelemetrySchema { file_format: "2.0.0".to_owned(), - schema_url: SchemaUrl("https://example.com/schema".to_owned()), + schema_url: SchemaUrl::new("https://example.com/schema".to_owned()), attribute_catalog: vec![], registry: v2::registry::Registry { attributes: vec![], // No attributes - This is the logic bug. @@ -639,7 +639,7 @@ mod tests { spans: vec![], metrics: vec![], events: vec![], - } + }, }; let result = ForgeResolvedRegistry::try_from(resolved_schema); diff --git a/crates/weaver_live_check/src/advice/mod.rs b/crates/weaver_live_check/src/advice/mod.rs index 3daf3128a..3fdca2cf5 100644 --- a/crates/weaver_live_check/src/advice/mod.rs +++ b/crates/weaver_live_check/src/advice/mod.rs @@ -161,7 +161,7 @@ mod tests { // Test DeprecatedAdvisor let mut deprecated_advisor = DeprecatedAdvisor; - let deprecated_attr = Rc::new(VersionedAttribute::V1(Attribute { + let deprecated_attr = Rc::new(VersionedAttribute::V1(Box::new(Attribute { name: "deprecated.attr".to_owned(), requirement_level: RequirementLevel::Basic(BasicRequirementLevelSpec::Required), r#type: PrimitiveOrArray(PrimitiveOrArrayTypeSpec::String), @@ -179,7 +179,7 @@ mod tests { role: None, tags: None, value: None, - })); + }))); let sample_attr = create_sample_attribute("deprecated.attr"); let sample = Sample::Attribute(sample_attr.clone()); @@ -198,7 +198,7 @@ mod tests { // Test TypeAdvisor let mut type_advisor = TypeAdvisor; - let int_attr = Rc::new(VersionedAttribute::V1(Attribute { + let int_attr = Rc::new(VersionedAttribute::V1(Box::new(Attribute { name: "int.attr".to_owned(), requirement_level: RequirementLevel::Basic(BasicRequirementLevelSpec::Required), r#type: PrimitiveOrArray(PrimitiveOrArrayTypeSpec::Int), @@ -214,7 +214,7 @@ mod tests { role: None, tags: None, value: None, - })); + }))); let mut sample_attr = create_sample_attribute("int.attr"); sample_attr.r#type = Some(PrimitiveOrArrayTypeSpec::String); @@ -236,7 +236,7 @@ mod tests { // Test StabilityAdvisor let mut stability_advisor = StabilityAdvisor; - let dev_attr = Rc::new(VersionedAttribute::V1(Attribute { + let dev_attr = Rc::new(VersionedAttribute::V1(Box::new(Attribute { name: "dev.attr".to_owned(), requirement_level: RequirementLevel::Basic(BasicRequirementLevelSpec::Required), r#type: PrimitiveOrArray(PrimitiveOrArrayTypeSpec::String), @@ -252,7 +252,7 @@ mod tests { role: None, tags: None, value: None, - })); + }))); let sample_attr = create_sample_attribute("dev.attr"); let sample = Sample::Attribute(sample_attr.clone()); diff --git a/crates/weaver_live_check/src/lib.rs b/crates/weaver_live_check/src/lib.rs index 6ea0018fb..23c8d577a 100644 --- a/crates/weaver_live_check/src/lib.rs +++ b/crates/weaver_live_check/src/lib.rs @@ -118,7 +118,7 @@ pub enum VersionedRegistry { /// v1 ResolvedRegistry V1(ResolvedRegistry), /// v2 ForgeResolvedRegistry - V2(ForgeResolvedRegistry), + V2(Box), } /// Versioned enum for the attribute @@ -126,9 +126,9 @@ pub enum VersionedRegistry { #[serde(untagged)] pub enum VersionedAttribute { /// v1 Attribute - V1(weaver_resolved_schema::attribute::Attribute), + V1(Box), /// v2 Attribute - V2(weaver_forge::v2::attribute::Attribute), + V2(Box), } impl VersionedAttribute { diff --git a/crates/weaver_live_check/src/live_checker.rs b/crates/weaver_live_check/src/live_checker.rs index f73b47e00..c7f960da5 100644 --- a/crates/weaver_live_check/src/live_checker.rs +++ b/crates/weaver_live_check/src/live_checker.rs @@ -64,7 +64,7 @@ impl LiveChecker { } } for attribute in &group.attributes { - let attribute_rc = Rc::new(VersionedAttribute::V1(attribute.clone())); + let attribute_rc = Rc::new(VersionedAttribute::V1(Box::new(attribute.clone()))); match attribute.r#type { AttributeType::Template(_) => { templates_by_length @@ -92,7 +92,7 @@ impl LiveChecker { let _ = semconv_events.insert(event_name, event_rc); } for attribute in ®istry.registry.attributes { - let attribute_rc = Rc::new(VersionedAttribute::V2(attribute.clone())); + let attribute_rc = Rc::new(VersionedAttribute::V2(Box::new(attribute.clone()))); match &attribute.r#type { AttributeType::Template(_) => { templates_by_length.push((attribute.key.clone(), attribute_rc.clone())); @@ -510,8 +510,8 @@ mod tests { fn make_registry(use_v2: bool) -> VersionedRegistry { if use_v2 { - VersionedRegistry::V2(ForgeResolvedRegistry { - schema_url: SchemaUrl("https://example.com/schemas/1.2.3".to_owned()), + VersionedRegistry::V2(Box::new(ForgeResolvedRegistry { + schema_url: SchemaUrl::new("https://example.com/schemas/1.2.3".to_owned()), registry: Registry { attributes: vec![ V2Attribute { @@ -612,7 +612,7 @@ mod tests { spans: vec![], events: vec![], }, - }) + })) } else { VersionedRegistry::V1(ResolvedRegistry { registry_url: "TEST".to_owned(), @@ -796,8 +796,8 @@ mod tests { }, }; - VersionedRegistry::V2(ForgeResolvedRegistry { - schema_url: SchemaUrl("https://example.com/schemas/1.2.3".to_owned()), + VersionedRegistry::V2(Box::new(ForgeResolvedRegistry { + schema_url: SchemaUrl::new("https://example.com/schemas/1.2.3".to_owned()), registry: Registry { attributes: vec![memory_state_attr.clone()], attribute_groups: vec![], @@ -845,7 +845,7 @@ mod tests { spans: vec![], events: vec![], }, - }) + })) } else { VersionedRegistry::V1(ResolvedRegistry { registry_url: "TEST_METRICS".to_owned(), @@ -1004,8 +1004,8 @@ mod tests { }, }; - VersionedRegistry::V2(ForgeResolvedRegistry { - schema_url: SchemaUrl("https://example.com/schemas/1.2.3".to_owned()), + VersionedRegistry::V2(Box::new(ForgeResolvedRegistry { + schema_url: SchemaUrl::new("https://example.com/schemas/1.2.3".to_owned()), registry: Registry { attributes: vec![custom_string_attr.clone()], attribute_groups: vec![], @@ -1040,7 +1040,7 @@ mod tests { spans: vec![], events: vec![], }, - }) + })) } else { VersionedRegistry::V1(ResolvedRegistry { registry_url: "TEST".to_owned(), @@ -1518,8 +1518,8 @@ mod tests { }, }; - VersionedRegistry::V2(ForgeResolvedRegistry { - schema_url: SchemaUrl("https://example.com/schemas/1.2.3".to_owned()), + VersionedRegistry::V2(Box::new(ForgeResolvedRegistry { + schema_url: SchemaUrl::new("https://example.com/schemas/1.2.3".to_owned()), registry: Registry { attributes: vec![session_id_attr.clone(), session_previous_id_attr.clone()], attribute_groups: vec![], @@ -1593,7 +1593,7 @@ mod tests { spans: vec![], events: vec![], }, - }) + })) } else { VersionedRegistry::V1(ResolvedRegistry { registry_url: "TEST_EVENTS".to_owned(), diff --git a/crates/weaver_mcp/src/service.rs b/crates/weaver_mcp/src/service.rs index ea4fc2617..9850dbd6d 100644 --- a/crates/weaver_mcp/src/service.rs +++ b/crates/weaver_mcp/src/service.rs @@ -58,7 +58,7 @@ impl WeaverMcpService { let search_context = Arc::new(SearchContext::from_registry(®istry)); // Create versioned registry wrapper once for live check - let versioned_registry = Arc::new(VersionedRegistry::V2((*registry).clone())); + let versioned_registry = Arc::new(VersionedRegistry::V2(Box::new(registry.as_ref().clone()))); Self { search_context, @@ -393,7 +393,7 @@ mod tests { fn make_test_registry() -> ForgeResolvedRegistry { ForgeResolvedRegistry { - schema_url: SchemaUrl("https://todo/1.0.0".to_owned()), + schema_url: SchemaUrl::new("https://todo/1.0.0".to_owned()), registry: Registry { attributes: vec![Attribute { key: "http.request.method".to_owned(), diff --git a/crates/weaver_resolved_schema/src/lib.rs b/crates/weaver_resolved_schema/src/lib.rs index 5d1d3b6f8..f16385777 100644 --- a/crates/weaver_resolved_schema/src/lib.rs +++ b/crates/weaver_resolved_schema/src/lib.rs @@ -97,7 +97,7 @@ impl ResolvedTelemetrySchema { Self { file_format: V1_RESOLVED_FILE_FORMAT.to_owned(), // TODO: is it correct? - schema_url: SchemaUrl(schema_url.as_ref().to_owned()), + schema_url: SchemaUrl::new(schema_url.as_ref().to_owned()), registry: Registry::new(registry_url), catalog: Catalog::default(), resource: None, diff --git a/crates/weaver_resolved_schema/src/v2/mod.rs b/crates/weaver_resolved_schema/src/v2/mod.rs index 23e2cdad4..e4a83d845 100644 --- a/crates/weaver_resolved_schema/src/v2/mod.rs +++ b/crates/weaver_resolved_schema/src/v2/mod.rs @@ -7,7 +7,7 @@ use serde::{Deserialize, Serialize}; use weaver_semconv::{ deprecated::Deprecated, group::GroupType, - manifest::{RegistryManifest, SchemaUrl}, + manifest::SchemaUrl, v2::{ attribute_group::AttributeGroupVisibilitySpec, signal_id::SignalId, span::SpanName, CommonFields, @@ -128,7 +128,7 @@ impl TryFrom for ResolvedTelemetrySchema { schema_url: value.schema_url, attribute_catalog, registry, - refinements + refinements, }) } } @@ -981,7 +981,7 @@ mod tests { fn test_try_from_v1_to_v2() { let v1_schema = crate::ResolvedTelemetrySchema { file_format: V1_RESOLVED_FILE_FORMAT.to_owned(), - schema_url: SchemaUrl("http://test/schemas/1.0.0".to_owned()), + schema_url: SchemaUrl::new("http://test/schemas/1.0.0".to_owned()), catalog: crate::catalog::Catalog::from_attributes(vec![]), registry: crate::registry::Registry { registry_url: "http://test/schemas/1.0".to_owned(), @@ -1000,7 +1000,7 @@ mod tests { assert_eq!(v2_schema.file_format, V2_RESOLVED_FILE_FORMAT); assert_eq!( v2_schema.schema_url, - SchemaUrl("http://test/schemas/1.0.0".to_owned()) + SchemaUrl::new("http://test/schemas/1.0.0".to_owned()) ); } @@ -1209,7 +1209,7 @@ mod tests { fn empty_v2_schema() -> ResolvedTelemetrySchema { ResolvedTelemetrySchema { file_format: V2_RESOLVED_FILE_FORMAT.to_owned(), - schema_url: SchemaUrl("http://test/schemas/1.0".to_owned()), + schema_url: SchemaUrl::new("http://test/schemas/1.0".to_owned()), attribute_catalog: vec![], registry: Registry { attributes: vec![], @@ -1223,7 +1223,7 @@ mod tests { spans: vec![], metrics: vec![], events: vec![], - } + }, } } } diff --git a/crates/weaver_resolver/src/dependency.rs b/crates/weaver_resolver/src/dependency.rs index fda6d54a2..f6b5bb8da 100644 --- a/crates/weaver_resolver/src/dependency.rs +++ b/crates/weaver_resolver/src/dependency.rs @@ -19,9 +19,9 @@ use crate::{attribute::AttributeCatalog, Error}; #[derive(Debug, Deserialize)] pub(crate) enum ResolvedDependency { /// A V1 Dependency - V1(V1Schema), + V1(Box), // A V2 Dependency - V2(V2Schema), + V2(Box), } impl ResolvedDependency { @@ -167,7 +167,7 @@ impl ImportableDependency for V2Schema { for ar in m.attributes.iter() { let attr = self.attribute_catalog.attribute(&ar.base).ok_or( Error::InvalidRegistryAttributeRef { - registry_name: self.schema_url.name().clone(), + registry_name: self.schema_url.name().to_owned(), attribute_ref: ar.base.0, }, )?; @@ -214,7 +214,7 @@ impl ImportableDependency for V2Schema { for ar in e.attributes.iter() { let attr = self.attribute_catalog.attribute(&ar.base).ok_or( Error::InvalidRegistryAttributeRef { - registry_name: self.schema_url.name().clone(), + registry_name: self.schema_url.name().to_owned(), attribute_ref: ar.base.0, }, )?; @@ -262,7 +262,7 @@ impl ImportableDependency for V2Schema { // TODO - this should be non-panic errors. let attr = self.attribute_catalog.attribute(&ar.base).ok_or( Error::InvalidRegistryAttributeRef { - registry_name: self.schema_url.name().clone(), + registry_name: self.schema_url.name().to_owned(), attribute_ref: ar.base.0, }, )?; @@ -276,7 +276,7 @@ impl ImportableDependency for V2Schema { // TODO - this should be non-panic errors. let attr = self.attribute_catalog.attribute(&ar.base).ok_or( Error::InvalidRegistryAttributeRef { - registry_name: self.schema_url.name().clone(), + registry_name: self.schema_url.name().to_owned(), attribute_ref: ar.base.0, }, )?; @@ -411,13 +411,13 @@ impl UnresolvedAttributeLookup for Vec { impl From for ResolvedDependency { fn from(value: V1Schema) -> Self { - ResolvedDependency::V1(value) + ResolvedDependency::V1(Box::new(value)) } } impl From for ResolvedDependency { fn from(value: V2Schema) -> Self { - ResolvedDependency::V2(value) + ResolvedDependency::V2(Box::new(value)) } } @@ -445,7 +445,7 @@ mod tests { #[test] fn test_lookup_group_attributes() -> Result<(), Box> { - let d = ResolvedDependency::V1(example_v1_schema()); + let d = ResolvedDependency::V1(Box::new(example_v1_schema())); let result = d.lookup_group_attributes("a"); assert!( result.is_some(), @@ -471,7 +471,7 @@ mod tests { fn example_v1_schema() -> V1Schema { V1Schema { file_format: "resolved/1.0.0".to_owned(), - schema_url: SchemaUrl("http://test/schemas/1.0.0".to_owned()), + schema_url: SchemaUrl::new("http://test/schemas/1.0.0".to_owned()), registry: weaver_resolved_schema::registry::Registry { registry_url: "v1-example".to_owned(), groups: vec![weaver_resolved_schema::registry::Group { diff --git a/crates/weaver_resolver/src/lib.rs b/crates/weaver_resolver/src/lib.rs index 98952d291..97c3b665f 100644 --- a/crates/weaver_resolver/src/lib.rs +++ b/crates/weaver_resolver/src/lib.rs @@ -43,7 +43,7 @@ impl SchemaResolver { dependencies, } => Self::resolve_registry(repo, specs, imports, dependencies, include_unreferenced), LoadedSemconvRegistry::Resolved(resolved_telemetry_schema) => { - WResult::Ok(resolved_telemetry_schema) + WResult::Ok(*resolved_telemetry_schema) } LoadedSemconvRegistry::ResolvedV2(_) => { todo!("Converting V2 schema back into V1 is unsupported") @@ -69,7 +69,7 @@ impl SchemaResolver { .push(Self::resolve(d, include_unreferenced).map(|s| s.into())); } LoadedSemconvRegistry::Resolved(schema) => { - opt_resolved_dependencies.push(WResult::Ok(schema.into())); + opt_resolved_dependencies.push(WResult::Ok((*schema).into())); } LoadedSemconvRegistry::ResolvedV2(schema) => { opt_resolved_dependencies.push(WResult::Ok(schema.into())); @@ -118,14 +118,14 @@ impl SchemaResolver { ResolvedTelemetrySchema { file_format: "1.0.0".to_owned(), - schema_url: schema_url, + schema_url, registry: resolved_registry, catalog, resource: None, instrumentation_library: None, dependencies: vec![], versions: None, // ToDo LQ: Implement this! - manifest: manifest, + manifest, } }) } diff --git a/crates/weaver_resolver/src/loader.rs b/crates/weaver_resolver/src/loader.rs index 6317a0450..a73843ca3 100644 --- a/crates/weaver_resolver/src/loader.rs +++ b/crates/weaver_resolver/src/loader.rs @@ -36,7 +36,7 @@ pub enum LoadedSemconvRegistry { dependencies: Vec, }, /// The semconv repository is already resolved and can be used as-is. - Resolved(V1Schema), + Resolved(Box), /// The semconv repository is already resolved and can be used as-is. ResolvedV2(V2Schema), } @@ -48,11 +48,9 @@ impl LoadedSemconvRegistry { use weaver_common::vdir::VirtualDirectoryPath; use weaver_semconv::provenance::Provenance; let path: VirtualDirectoryPath = "data".try_into().expect("Bad fake path for test"); - let repo = RegistryRepo::try_new(None, &path).map_err(|e| { - Error::InvalidUrl { - url: "test string".to_owned(), - error: format!("{e}"), - } + let repo = RegistryRepo::try_new(None, &path).map_err(|e| Error::InvalidUrl { + url: "test string".to_owned(), + error: format!("{e}"), })?; let provenance = Provenance::new("default", ""); let spec_with_provenance = SemConvSpecWithProvenance::from_string(provenance, spec) @@ -81,8 +79,8 @@ impl LoadedSemconvRegistry { match self { LoadedSemconvRegistry::Unresolved { repo, .. } => repo.registry_path_repr(), // TODO - are these correct? - LoadedSemconvRegistry::Resolved(schema) => &schema.schema_url.0, - LoadedSemconvRegistry::ResolvedV2(schema) => &schema.schema_url.0, + LoadedSemconvRegistry::Resolved(schema) => &schema.schema_url.url, + LoadedSemconvRegistry::ResolvedV2(schema) => &schema.schema_url.url, } } @@ -117,8 +115,8 @@ impl LoadedSemconvRegistry { } result } - LoadedSemconvRegistry::Resolved(schema) => vec![schema.schema_url.name().clone()], - LoadedSemconvRegistry::ResolvedV2(schema) => vec![schema.schema_url.name().clone()], + LoadedSemconvRegistry::Resolved(schema) => vec![schema.schema_url.name().to_owned()], + LoadedSemconvRegistry::ResolvedV2(schema) => vec![schema.schema_url.name().to_owned()], } } } @@ -204,7 +202,7 @@ fn load_semconv_repository_recursive( let mut loaded_dependencies = vec![]; let mut non_fatal_errors = vec![]; for d in manifest.dependencies.iter() { - match RegistryRepo::try_new_dependency(&d) { + match RegistryRepo::try_new_dependency(d) { Ok(d_repo) => { // so we need to make sure the dependency chain only include direct dependencies of each other. match load_semconv_repository_recursive( diff --git a/crates/weaver_resolver/src/registry.rs b/crates/weaver_resolver/src/registry.rs index c2278e745..3916363bf 100644 --- a/crates/weaver_resolver/src/registry.rs +++ b/crates/weaver_resolver/src/registry.rs @@ -846,12 +846,12 @@ pub(crate) fn cleanup_and_stabilize_catalog_and_registry( mod tests { use rand::rng; use rand::seq::SliceRandom; - use weaver_semconv::manifest::SchemaUrl; use std::cmp::Ordering; use std::collections::HashMap; use std::error::Error; use std::fs::OpenOptions; use std::path::PathBuf; + use weaver_semconv::manifest::SchemaUrl; use glob::glob; use serde::Serialize; @@ -919,13 +919,12 @@ mod tests { let observed_output_dir = PathBuf::from(format!("observed_output/{test_dir}")); std::fs::create_dir_all(observed_output_dir.clone()) .expect("Failed to create observed output directory"); - let schema_url = Some(SchemaUrl("https://default/0.1.0".to_owned())); + let schema_url = Some(SchemaUrl::new("https://default/0.1.0".to_owned())); let location: VirtualDirectoryPath = format!("{test_dir}/registry") .try_into() .expect("Failed to parse file directory"); let loaded = SchemaResolver::load_semconv_repository( - RegistryRepo::try_new(schema_url, &location) - .expect("Failed to load registry"), + RegistryRepo::try_new(schema_url, &location).expect("Failed to load registry"), true, ) .ignore(|e| { @@ -1114,7 +1113,7 @@ groups: path: "data/registry-test-7-spans/registry".to_owned(), }; - let schema_url = Some(SchemaUrl(format!("https://local/registry/1.0.0"))); + let schema_url = Some(SchemaUrl::new("https://local/registry/1.0.0".to_owned())); let repo = RegistryRepo::try_new(schema_url, &path)?; let loaded = SchemaResolver::load_semconv_repository(repo, true).into_result_failing_non_fatal()?; diff --git a/crates/weaver_search/src/lib.rs b/crates/weaver_search/src/lib.rs index 70a2a1ace..8525af99f 100644 --- a/crates/weaver_search/src/lib.rs +++ b/crates/weaver_search/src/lib.rs @@ -544,7 +544,7 @@ mod tests { fn make_test_registry() -> ForgeResolvedRegistry { ForgeResolvedRegistry { - schema_url: SchemaUrl("https://example.com/schemas/1.2.3".to_owned()), + schema_url: SchemaUrl::new("https://example.com/schemas/1.2.3".to_owned()), registry: Registry { attributes: vec![ make_attribute("http.request.method", "HTTP request method", "", false), diff --git a/crates/weaver_semconv/src/manifest.rs b/crates/weaver_semconv/src/manifest.rs index f2a72eed9..65d66903d 100644 --- a/crates/weaver_semconv/src/manifest.rs +++ b/crates/weaver_semconv/src/manifest.rs @@ -14,18 +14,43 @@ use crate::Error::{InvalidRegistryManifest, RegistryManifestNotFound}; use schemars::JsonSchema; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::path::PathBuf; +use std::sync::OnceLock; use weaver_common::error::handle_errors; use weaver_common::vdir::VirtualDirectoryPath; /// Represents the schema URL of a registry, which serves as a unique identifier for the registry /// along with its version. -#[derive(Debug, Clone, PartialEq, Eq, Hash, JsonSchema)] -pub struct SchemaUrl(pub String); +#[derive(Debug, Clone, JsonSchema)] +pub struct SchemaUrl { + /// The schema URL string. + pub url: String, + #[serde(skip)] + #[schemars(skip)] + name: OnceLock, + #[serde(skip)] + #[schemars(skip)] + version: OnceLock, +} impl SchemaUrl { + /// Create a new SchemaUrl from a string. + #[must_use] + pub fn new(url: String) -> Self { + Self { + url, + name: OnceLock::new(), + version: OnceLock::new(), + } + } + + /// Get the URL as a string. + pub fn as_str(&self) -> &str { + &self.url + } + /// Validate the schema URL format. pub fn validate(&self) -> Result<(), String> { - let parsed = url::Url::parse(&self.0).map_err(|e| format!("Invalid schema URL: {e}"))?; + let parsed = url::Url::parse(&self.url).map_err(|e| format!("Invalid schema URL: {e}"))?; if parsed.path_segments().map(|c| c.count()).unwrap_or(0) == 0 { return Err("The schema URL must have at least one path segment.".to_owned()); } @@ -33,51 +58,72 @@ impl SchemaUrl { } /// Returns the registry name, derived from the schema URL. - pub fn name(&self) -> String { - let parsed_url = url::Url::parse(&self.0).expect("schema_url must be valid"); - let path = parsed_url.path().trim_matches('/'); - let mut segments: Vec<&str> = path.split('/').collect(); - if !segments.is_empty() { - _ = segments.pop(); - } + #[must_use] + pub fn name(&self) -> &str { + self.name.get_or_init(|| { + let parsed_url = url::Url::parse(&self.url).expect("schema_url must be valid"); + let path = parsed_url.path().trim_matches('/'); + let mut segments: Vec<&str> = path.split('/').collect(); + if !segments.is_empty() { + _ = segments.pop(); + } - if segments.is_empty() { - return parsed_url.authority().to_string(); - } + if segments.is_empty() { + return parsed_url.authority().to_owned(); + } - format!("{}/{}", parsed_url.authority(), segments.join("/")) + format!("{}/{}", parsed_url.authority(), segments.join("/")) + }) } /// Returns the registry version, derived from the schema URL. - pub fn version(&self) -> String { - let parsed_url = url::Url::parse(&self.0).expect("schema_url must be valid"); - parsed_url - .path() - .trim_matches('/') - .rsplit('/') - .next() - .unwrap_or("") - .to_string() + #[must_use] + pub fn version(&self) -> &str { + self.version.get_or_init(|| { + let parsed_url = url::Url::parse(&self.url).expect("schema_url must be valid"); + parsed_url + .path() + .trim_matches('/') + .rsplit('/') + .next() + .unwrap_or("") + .to_owned() + }) } /// Create a SchemaUrl from name and version. pub fn from_name_version(name: &str, version: &str) -> Result { - let schema_url_str; // TODO: replace with scheme regex - if name.starts_with("http://") || name.starts_with("https://") { - schema_url_str = format!("{}/{}", name.trim_end_matches('/'), version); - } else { - schema_url_str = format!("https://{}/{}", name.trim_end_matches('/'), version); - } - let schema_url = SchemaUrl(schema_url_str); + let schema_url = SchemaUrl::new( + if name.starts_with("http://") || name.starts_with("https://") { + format!("{}/{}", name.trim_end_matches('/'), version) + } else { + format!("https://{}/{}", name.trim_end_matches('/'), version) + }, + ); + schema_url.validate()?; Ok(schema_url) } } +impl PartialEq for SchemaUrl { + fn eq(&self, other: &Self) -> bool { + self.url == other.url + } +} + +impl Eq for SchemaUrl {} + +impl std::hash::Hash for SchemaUrl { + fn hash(&self, state: &mut H) { + self.url.hash(state); + } +} + impl std::fmt::Display for SchemaUrl { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.0) + write!(f, "{}", self.url) } } @@ -87,7 +133,7 @@ impl<'de> Deserialize<'de> for SchemaUrl { D: Deserializer<'de>, { let s = String::deserialize(deserializer)?; - Ok(SchemaUrl(s)) + Ok(SchemaUrl::new(s)) } } @@ -96,7 +142,7 @@ impl Serialize for SchemaUrl { where S: Serializer, { - serializer.serialize_str(&self.0) + serializer.serialize_str(&self.url) } } @@ -203,10 +249,10 @@ impl<'de> Deserialize<'de> for Dependency { let schema_url = match (helper.schema_url, helper.name) { (Some(url), _) => url, - (None, Some(name)) => SchemaUrl(format!("{}/unknown", name)), + (None, Some(name)) => SchemaUrl::new(format!("{}/unknown", name)), (None, None) => { return Err(serde::de::Error::custom( - "Either 'schema_url' or 'name' must be provided for a dependency" + "Either 'schema_url' or 'name' must be provided for a dependency", )) } }; @@ -282,13 +328,13 @@ impl RegistryManifest { path: path.clone(), error: "The registry schema base URL is required.".to_owned(), }); - } else { - if let Err(e) = url::Url::parse(self.schema_base_url.as_ref().unwrap()) { - errors.push(InvalidRegistryManifest { - path: path.clone(), - error: format!("Invalid schema base URL: {}", e), - }); - } + } else if let Err(e) = + url::Url::parse(&self.schema_base_url.clone().unwrap_or_default()) + { + errors.push(InvalidRegistryManifest { + path: path.clone(), + error: format!("Invalid schema base URL: {}", e), + }); } if self @@ -321,23 +367,23 @@ impl RegistryManifest { /// Returns the registry name, which is derived from the schema URL. /// For example, if the schema URL is `https://opentelemetry.io/schemas/sub-component/1.0.0`, /// the registry name would be `opentelemetry.io/schemas/sub-component` + #[must_use] pub fn name(&self) -> String { self.schema_url .as_ref() - .map(|url| url.name()) + .map(|url| url.name().to_owned()) .unwrap_or_default() - .to_string() } /// Returns the registry version, which is derived from the schema URL. /// For example, if the schema URL is `https://opentelemetry.io/schemas/sub-component/1.0.0`, /// the registry version would be `1.0.0` + #[must_use] pub fn version(&self) -> String { self.schema_url .as_ref() - .map(|url| url.version()) + .map(|url| url.version().to_owned()) .unwrap_or_default() - .to_string() } } diff --git a/crates/weaver_semconv/src/registry.rs b/crates/weaver_semconv/src/registry.rs index 2afbfb507..3fd760d4b 100644 --- a/crates/weaver_semconv/src/registry.rs +++ b/crates/weaver_semconv/src/registry.rs @@ -5,7 +5,7 @@ use crate::attribute::AttributeSpecWithProvenance; use crate::group::{GroupSpecWithProvenance, ImportsWithProvenance}; use crate::json_schema::JsonSchemaValidator; -use crate::manifest::RegistryManifest; +use crate::manifest::{RegistryManifest, SchemaUrl}; use crate::provenance::Provenance; use crate::registry_repo::RegistryRepo; use crate::semconv::{SemConvSpecV1WithProvenance, SemConvSpecWithProvenance}; @@ -143,22 +143,21 @@ impl SemConvRegistry { } } + let schema_url = SchemaUrl::from_name_version(®istry_repo.name(), &semconv_version) + .map_err(|e| Error::InvalidRegistryManifest { + path: registry_repo.registry_path_repr().into(), + error: e.clone(), + })?; + registry.set_manifest(RegistryManifest { file_format: None, - schema_url: registry_repo - .manifest() - .and_then(|m| Some(m.schema_url.clone())) - .unwrap_or_default(), - schema_base_url: registry_repo - .manifest() - .and_then(|m| m.schema_base_url.clone()), - semconv_version: registry_repo - .manifest() - .and_then(|m| m.semconv_version.clone()), + schema_url: Some(schema_url), description: registry_repo.manifest().and_then(|m| m.description.clone()), dependencies: vec![], resolved_schema_uri: None, stability: crate::stability::Stability::Development, + semconv_version: None, + schema_base_url: None, }); } else { registry.manifest = registry_repo.manifest().cloned(); @@ -394,8 +393,11 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "data".to_owned(), }; - let registry_repo = - RegistryRepo::try_new(Some(SchemaUrl("https://test/42".to_owned())), ®istry_path).unwrap(); + let registry_repo = RegistryRepo::try_new( + Some(SchemaUrl::new("https://test/42".to_owned())), + ®istry_path, + ) + .unwrap(); let registry = SemConvRegistry::from_semconv_specs(®istry_repo, semconv_specs).unwrap(); assert_eq!(registry.id(), "test"); assert_eq!(registry.semconv_spec_count(), 2); diff --git a/crates/weaver_semconv/src/registry_repo.rs b/crates/weaver_semconv/src/registry_repo.rs index 2a5de63fe..e07628534 100644 --- a/crates/weaver_semconv/src/registry_repo.rs +++ b/crates/weaver_semconv/src/registry_repo.rs @@ -42,16 +42,14 @@ pub struct RegistryRepo { impl RegistryRepo { /// Creates a new `RegistryRepo` from a `Dependency` object that specifies the schema URL and path. - pub fn try_new_dependency( - dependency: &Dependency, - ) -> Result { + pub fn try_new_dependency(dependency: &Dependency) -> Result { let path = dependency.registry_path.clone().unwrap_or_else(|| { - // If no registry path is provided, we assume it's the same as the parent registry. - VirtualDirectoryPath::RemoteArchive { - url: dependency.schema_url.to_string(), - sub_folder: None, - } - }); + // If no registry path is provided, we assume it's the same as the parent registry. + VirtualDirectoryPath::RemoteArchive { + url: dependency.schema_url.to_string(), + sub_folder: None, + } + }); Self::try_new(Some(dependency.schema_url.clone()), &path) } @@ -65,9 +63,6 @@ impl RegistryRepo { ) -> Result { let registry = VirtualDirectory::try_new(registry_path).map_err(Error::VirtualDirectoryError)?; - let mut manifest = None; - let mut registry_name = None; - let mut registry_version = None; // Try to load manifest if let Some(manifest_path) = { // We need a temporary RegistryRepo to call manifest_path @@ -80,20 +75,29 @@ impl RegistryRepo { temp_repo.manifest_path() } { let registry_manifest = RegistryManifest::try_from_file(manifest_path)?; - registry_name = Some(registry_manifest.name()); - registry_version = Some(registry_manifest.version()); - manifest = Some(registry_manifest); + Ok(Self { + name: registry_manifest.name().into(), + version: registry_manifest.version().into(), + registry, + manifest: Some(registry_manifest), + }) } else { - // No manifest, require name and version - registry_name = schema_url.as_ref().map(|url| url.name()).or(Some("unknown".to_owned())); - registry_version = schema_url.as_ref().map(|url| url.version()).or(Some("unknown".to_owned())); + // No manifest + Ok(Self { + name: Arc::from( + schema_url + .as_ref() + .map_or("unknown".to_owned(), |url| url.name().to_owned()), + ), + version: Arc::from( + schema_url + .as_ref() + .map_or("unknown".to_owned(), |url| url.version().to_owned()), + ), + registry, + manifest: None, + }) } - Ok(Self { - name: registry_name.unwrap().into(), - version: registry_version.unwrap().into(), - registry, - manifest, - }) } /// Returns the registry name (from manifest if present, otherwise top-level field). @@ -242,8 +246,8 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "tests/published_repository/resolved/1.0.0".to_owned(), }; - let repo = RegistryRepo::try_new(None, ®istry_path) - .expect("Failed to load test repository."); + let repo = + RegistryRepo::try_new(None, ®istry_path).expect("Failed to load test repository."); let Some(manifest) = repo.manifest() else { panic!("Did not resolve manifest for repo: {repo:?}"); }; @@ -264,8 +268,8 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "tests/published_repository/resolved/2.0.0".to_owned(), }; - let repo = RegistryRepo::try_new(None, ®istry_path) - .expect("Failed to load test repository."); + let repo = + RegistryRepo::try_new(None, ®istry_path).expect("Failed to load test repository."); let Some(resolved_path) = repo.resolved_schema_uri() else { panic!( "Should find a resolved schema path from manifest in {}", @@ -278,8 +282,8 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "tests/published_repository/3.0.0".to_owned(), }; - let repo = RegistryRepo::try_new(None, ®istry_path) - .expect("Failed to load test repository."); + let repo = + RegistryRepo::try_new(None, ®istry_path).expect("Failed to load test repository."); let Some(resolved_path) = repo.resolved_schema_uri() else { panic!( "Should find a resolved schema path from manifest in {}", diff --git a/crates/weaver_semconv_gen/src/v2.rs b/crates/weaver_semconv_gen/src/v2.rs index 47ea0369c..586080a7a 100644 --- a/crates/weaver_semconv_gen/src/v2.rs +++ b/crates/weaver_semconv_gen/src/v2.rs @@ -452,7 +452,7 @@ mod tests { fn test_registry() -> ResolvedTelemetrySchema { ResolvedTelemetrySchema { file_format: "resolved/2.0.0".to_owned(), - schema_url: SchemaUrl("https://todo/1.0.0".to_owned()), + schema_url: SchemaUrl::new("https://todo/1.0.0".to_owned()), attribute_catalog: vec![Attribute { key: "attr1".to_owned(), r#type: AttributeType::PrimitiveOrArray(PrimitiveOrArrayTypeSpec::String), @@ -568,7 +568,7 @@ mod tests { common: CommonFields::default(), }, }], - } + }, } } } diff --git a/src/registry/diff.rs b/src/registry/diff.rs index d013baff9..ab12ac72f 100644 --- a/src/registry/diff.rs +++ b/src/registry/diff.rs @@ -101,12 +101,8 @@ pub(crate) fn command(args: &RegistryDiffArgs) -> Result Result Result WeaverEngine<'a> { diag_msgs: &mut DiagnosticMessages, ) -> Result { let registry_path = &self.registry_config.registry; - let main_registry_repo = - RegistryRepo::try_new(None, registry_path)?; + let main_registry_repo = RegistryRepo::try_new(None, registry_path)?; self.load_definitions(main_registry_repo, diag_msgs) } diff --git a/tests/resolution_process.rs b/tests/resolution_process.rs index 050472f81..3d7c009d5 100644 --- a/tests/resolution_process.rs +++ b/tests/resolution_process.rs @@ -34,12 +34,10 @@ fn test_cli_interface() { refspec: None, }; - let schema_url = Some(SchemaUrl("https://opelemetry.io/schemas/1.40.0".to_owned())); - let registry_repo = - RegistryRepo::try_new(schema_url, ®istry_path) - .unwrap_or_else(|e| { - panic!("Failed to create the registry repo, error: {e}"); - }); + let schema_url = Some(SchemaUrl::new("https://opelemetry.io/schemas/1.40.0".to_owned())); + let registry_repo = RegistryRepo::try_new(schema_url, ®istry_path).unwrap_or_else(|e| { + panic!("Failed to create the registry repo, error: {e}"); + }); let loaded = SchemaResolver::load_semconv_repository(registry_repo, false) .ignore(|e| matches!(e.severity(), Some(miette::Severity::Warning))) .into_result_failing_non_fatal() From 84c81c15ce181f26cd981a51157b39f217962ada Mon Sep 17 00:00:00 2001 From: Liudmila Molkova Date: Sat, 14 Feb 2026 13:46:51 -0800 Subject: [PATCH 11/27] cleanup --- crates/weaver_resolved_schema/src/error.rs | 13 ++++- crates/weaver_resolved_schema/src/lib.rs | 53 +++++++++++---------- crates/weaver_resolved_schema/src/v2/mod.rs | 29 +++++++---- crates/weaver_resolver/src/dependency.rs | 6 +-- crates/weaver_resolver/src/lib.rs | 5 +- crates/weaver_resolver/src/loader.rs | 4 +- 6 files changed, 66 insertions(+), 44 deletions(-) diff --git a/crates/weaver_resolved_schema/src/error.rs b/crates/weaver_resolved_schema/src/error.rs index 99a1f3750..2986e22e7 100644 --- a/crates/weaver_resolved_schema/src/error.rs +++ b/crates/weaver_resolved_schema/src/error.rs @@ -5,7 +5,7 @@ use serde::{Deserialize, Serialize}; use crate::attribute::AttributeRef; -use crate::error::Error::{AttributeNotFound, CompoundError, EventNameNotFound}; +use crate::error::Error::{AttributeNotFound, CompoundError, EventNameNotFound, InvalidSchemaUrl}; /// Errors emitted by this crate. #[derive(thiserror::Error, Debug, Clone, Deserialize, Serialize)] @@ -26,6 +26,16 @@ pub enum Error { group_id: String, }, + /// Cannot convert from V1 to V2 schema due to invalid schema URL. + #[error("Failed to convert from V1 to V2 schema, invalid schema URL: {url}, error: {error}")] + InvalidSchemaUrl { + /// The invalid schema URL. + url: String, + + /// The error message from the URL validation. + error: String, + }, + /// A generic container for multiple errors. #[error("Errors:\n{0:#?}")] CompoundError(Vec), @@ -53,6 +63,7 @@ impl Error { CompoundError(errors) => errors, e @ AttributeNotFound { .. } => vec![e], e @ EventNameNotFound { .. } => vec![e], + e @ InvalidSchemaUrl { .. } => vec![e], }) .collect(), ) diff --git a/crates/weaver_resolved_schema/src/lib.rs b/crates/weaver_resolved_schema/src/lib.rs index f16385777..dc798ff14 100644 --- a/crates/weaver_resolved_schema/src/lib.rs +++ b/crates/weaver_resolved_schema/src/lib.rs @@ -15,7 +15,7 @@ use serde::{Deserialize, Serialize}; use std::collections::HashMap; use weaver_semconv::deprecated::Deprecated; use weaver_semconv::group::GroupType; -use weaver_semconv::manifest::{RegistryManifest, SchemaUrl}; +use weaver_semconv::manifest::RegistryManifest; use weaver_version::schema_changes::{SchemaChanges, SchemaItemChange, SchemaItemType}; use weaver_version::Versions; @@ -51,8 +51,10 @@ pub(crate) const V2_RESOLVED_FILE_FORMAT: &str = "resolved/2.0.0"; pub struct ResolvedTelemetrySchema { /// Version of the file structure. pub file_format: String, - /// Schema URL that this file is or will be published at. - pub schema_url: SchemaUrl, + /// Schema URL that this file is published at. + pub schema_url: String, + /// The ID of the registry that this schema belongs to. + pub registry_id: String, /// The registry that this schema belongs to. pub registry: Registry, /// Catalog of unique items that are shared across multiple registries @@ -77,8 +79,7 @@ pub struct ResolvedTelemetrySchema { #[serde(skip_serializing_if = "Option::is_none")] pub versions: Option, /// The manifest of the registry. - #[serde(skip)] - pub manifest: Option, + pub registry_manifest: Option, } /// Statistics on a resolved telemetry schema. @@ -93,18 +94,18 @@ pub struct Stats { impl ResolvedTelemetrySchema { /// Create a new resolved telemetry schema. - pub fn new>(schema_url: S, registry_url: S) -> Self { + pub fn new>(schema_url: S, registry_id: S, registry_url: S) -> Self { Self { file_format: V1_RESOLVED_FILE_FORMAT.to_owned(), - // TODO: is it correct? - schema_url: SchemaUrl::new(schema_url.as_ref().to_owned()), + schema_url: schema_url.as_ref().to_owned(), + registry_id: registry_id.as_ref().to_owned(), registry: Registry::new(registry_url), catalog: Catalog::default(), resource: None, instrumentation_library: None, dependencies: vec![], versions: None, - manifest: None, + registry_manifest: None, } } @@ -295,13 +296,13 @@ impl ResolvedTelemetrySchema { pub fn diff(&self, baseline_schema: &ResolvedTelemetrySchema) -> SchemaChanges { let mut changes = SchemaChanges::new(); - if let Some(ref manifest) = self.manifest { + if let Some(ref manifest) = self.registry_manifest { changes.set_head_manifest(weaver_version::schema_changes::RegistryManifest { semconv_version: manifest.version().clone(), }); } - if let Some(ref manifest) = baseline_schema.manifest { + if let Some(ref manifest) = baseline_schema.registry_manifest { changes.set_baseline_manifest(weaver_version::schema_changes::RegistryManifest { semconv_version: manifest.version().clone(), }); @@ -538,7 +539,7 @@ mod tests { #[test] fn no_diff() { - let mut prior_schema = ResolvedTelemetrySchema::new("1.0", ""); + let mut prior_schema = ResolvedTelemetrySchema::new("1.0", "", ""); prior_schema.add_attribute_group( "group1", [ @@ -555,7 +556,7 @@ mod tests { #[test] fn detect_2_added_registry_attributes() { - let mut prior_schema = ResolvedTelemetrySchema::new("1.0", ""); + let mut prior_schema = ResolvedTelemetrySchema::new("1.0", "", ""); prior_schema.add_attribute_group( "registry.group1", [ @@ -564,7 +565,7 @@ mod tests { ], ); - let mut latest_schema = ResolvedTelemetrySchema::new("1.0", ""); + let mut latest_schema = ResolvedTelemetrySchema::new("1.0", "", ""); latest_schema.add_attribute_group( "registry.group1", [ @@ -583,7 +584,7 @@ mod tests { #[test] fn detect_2_deprecated_registry_attributes() { - let mut prior_schema = ResolvedTelemetrySchema::new("1.0", ""); + let mut prior_schema = ResolvedTelemetrySchema::new("1.0", "", ""); prior_schema.add_attribute_group( "registry.group1", [ @@ -597,7 +598,7 @@ mod tests { ], ); - let mut latest_schema = ResolvedTelemetrySchema::new("1.0", ""); + let mut latest_schema = ResolvedTelemetrySchema::new("1.0", "", ""); latest_schema.add_attribute_group( "registry.group1", [ @@ -644,7 +645,7 @@ mod tests { #[test] fn detect_2_renamed_registry_attributes() { - let mut prior_schema = ResolvedTelemetrySchema::new("http://test/schemas/1.0", ""); + let mut prior_schema = ResolvedTelemetrySchema::new("http://test/schemas/1.0", "", ""); prior_schema.add_attribute_group( "registry.group1", [ @@ -658,7 +659,7 @@ mod tests { // 2 new attributes are added: attr2_bis and attr3_bis // attr2 is renamed attr2_bis // attr3 is renamed attr3_bis - let mut latest_schema = ResolvedTelemetrySchema::new("http://test/schemas/2.0", ""); + let mut latest_schema = ResolvedTelemetrySchema::new("http://test/schemas/2.0", "", ""); latest_schema.add_attribute_group( "registry.group1", [ @@ -692,7 +693,7 @@ mod tests { #[test] fn detect_2_attributes_renamed_to_the_same_existing_attribute() { - let mut prior_schema = ResolvedTelemetrySchema::new("1.0", ""); + let mut prior_schema = ResolvedTelemetrySchema::new("1.0", "", ""); prior_schema.add_attribute_group( "registry.group1", [ @@ -704,7 +705,7 @@ mod tests { ); prior_schema.add_attribute_group("group2", [Attribute::string("attr5", "brief", "note")]); - let mut latest_schema = ResolvedTelemetrySchema::new("1.0", ""); + let mut latest_schema = ResolvedTelemetrySchema::new("1.0", "", ""); latest_schema.add_attribute_group( "registry.group1", [ @@ -731,7 +732,7 @@ mod tests { #[test] fn detect_2_attributes_renamed_to_the_same_new_attribute() { - let mut prior_schema = ResolvedTelemetrySchema::new("1.0", ""); + let mut prior_schema = ResolvedTelemetrySchema::new("1.0", "", ""); prior_schema.add_attribute_group( "registry.group1", [ @@ -742,7 +743,7 @@ mod tests { ], ); - let mut latest_schema = ResolvedTelemetrySchema::new("1.0", ""); + let mut latest_schema = ResolvedTelemetrySchema::new("1.0", "", ""); latest_schema.add_attribute_group( "registry.group1", [ @@ -775,7 +776,7 @@ mod tests { /// However, detecting this case is useful for identifying a violation of the process. #[test] fn detect_2_removed_attributes() { - let mut prior_schema = ResolvedTelemetrySchema::new("1.0", ""); + let mut prior_schema = ResolvedTelemetrySchema::new("1.0", "", ""); prior_schema.add_attribute_group( "registry.group1", [ @@ -786,7 +787,7 @@ mod tests { ], ); - let mut latest_schema = ResolvedTelemetrySchema::new("1.0", ""); + let mut latest_schema = ResolvedTelemetrySchema::new("1.0", "", ""); latest_schema.add_attribute_group( "registry.group1", [ @@ -804,9 +805,9 @@ mod tests { // TODO add many more group diff checks for various capabilities. #[test] fn detect_metric_name_change() { - let mut prior_schema = ResolvedTelemetrySchema::new("http://test/schemas/1.0", ""); + let mut prior_schema = ResolvedTelemetrySchema::new("http://test/schemas/1.0", "", ""); prior_schema.add_metric_group("metrics.cpu.time", "cpu.time", [], None); - let mut latest_schema = ResolvedTelemetrySchema::new("http://test/schemas/2.0", ""); + let mut latest_schema = ResolvedTelemetrySchema::new("http://test/schemas/2.0", "", ""); latest_schema.add_metric_group( "metrics.cpu.time", "cpu.time", diff --git a/crates/weaver_resolved_schema/src/v2/mod.rs b/crates/weaver_resolved_schema/src/v2/mod.rs index e4a83d845..c3bcf416f 100644 --- a/crates/weaver_resolved_schema/src/v2/mod.rs +++ b/crates/weaver_resolved_schema/src/v2/mod.rs @@ -123,13 +123,21 @@ impl TryFrom for ResolvedTelemetrySchema { fn try_from(value: crate::ResolvedTelemetrySchema) -> Result { let (attribute_catalog, registry, refinements) = convert_v1_to_v2(value.catalog, value.registry)?; - Ok(ResolvedTelemetrySchema { - file_format: V2_RESOLVED_FILE_FORMAT.to_owned(), - schema_url: value.schema_url, - attribute_catalog, - registry, - refinements, - }) + let schema_url = SchemaUrl::new(value.schema_url); + + match schema_url.validate() { + Ok(_) => Ok(ResolvedTelemetrySchema { + file_format: V2_RESOLVED_FILE_FORMAT.to_owned(), + schema_url, + attribute_catalog, + registry, + refinements, + }), + Err(e) => Err(crate::error::Error::InvalidSchemaUrl { + url: schema_url.to_string(), + error: e.clone(), + }), + } } } @@ -981,17 +989,18 @@ mod tests { fn test_try_from_v1_to_v2() { let v1_schema = crate::ResolvedTelemetrySchema { file_format: V1_RESOLVED_FILE_FORMAT.to_owned(), - schema_url: SchemaUrl::new("http://test/schemas/1.0.0".to_owned()), + schema_url: "http://test/schemas/1.0.0".to_owned(), + registry_id: "my-registry".to_owned(), catalog: crate::catalog::Catalog::from_attributes(vec![]), registry: crate::registry::Registry { - registry_url: "http://test/schemas/1.0".to_owned(), + registry_url: "http://test/schemas/1.0.0".to_owned(), groups: vec![], }, instrumentation_library: None, resource: None, dependencies: vec![], versions: None, - manifest: None, + registry_manifest: None, }; let v2_schema: Result = v1_schema.try_into(); diff --git a/crates/weaver_resolver/src/dependency.rs b/crates/weaver_resolver/src/dependency.rs index f6b5bb8da..56f041de8 100644 --- a/crates/weaver_resolver/src/dependency.rs +++ b/crates/weaver_resolver/src/dependency.rs @@ -439,7 +439,6 @@ mod tests { use itertools::Itertools; use std::error::Error; use weaver_resolved_schema::ResolvedTelemetrySchema as V1Schema; - use weaver_semconv::manifest::SchemaUrl; use crate::dependency::{ResolvedDependency, UnresolvedAttributeLookup}; @@ -471,7 +470,8 @@ mod tests { fn example_v1_schema() -> V1Schema { V1Schema { file_format: "resolved/1.0.0".to_owned(), - schema_url: SchemaUrl::new("http://test/schemas/1.0.0".to_owned()), + schema_url: "http://test/schemas/1.0.0".to_owned(), + registry_id: "test-registry".to_owned(), registry: weaver_resolved_schema::registry::Registry { registry_url: "v1-example".to_owned(), groups: vec![weaver_resolved_schema::registry::Group { @@ -523,7 +523,7 @@ mod tests { instrumentation_library: None, dependencies: vec![], versions: None, - manifest: None, + registry_manifest: None, } } } diff --git a/crates/weaver_resolver/src/lib.rs b/crates/weaver_resolver/src/lib.rs index 97c3b665f..b4a61142a 100644 --- a/crates/weaver_resolver/src/lib.rs +++ b/crates/weaver_resolver/src/lib.rs @@ -118,14 +118,15 @@ impl SchemaResolver { ResolvedTelemetrySchema { file_format: "1.0.0".to_owned(), - schema_url, + schema_url: schema_url.url.clone(), + registry_id: schema_url.name().to_owned(), registry: resolved_registry, catalog, resource: None, instrumentation_library: None, dependencies: vec![], versions: None, // ToDo LQ: Implement this! - manifest, + registry_manifest: manifest, } }) } diff --git a/crates/weaver_resolver/src/loader.rs b/crates/weaver_resolver/src/loader.rs index a73843ca3..fd94a17d0 100644 --- a/crates/weaver_resolver/src/loader.rs +++ b/crates/weaver_resolver/src/loader.rs @@ -79,7 +79,7 @@ impl LoadedSemconvRegistry { match self { LoadedSemconvRegistry::Unresolved { repo, .. } => repo.registry_path_repr(), // TODO - are these correct? - LoadedSemconvRegistry::Resolved(schema) => &schema.schema_url.url, + LoadedSemconvRegistry::Resolved(schema) => &schema.schema_url, LoadedSemconvRegistry::ResolvedV2(schema) => &schema.schema_url.url, } } @@ -115,7 +115,7 @@ impl LoadedSemconvRegistry { } result } - LoadedSemconvRegistry::Resolved(schema) => vec![schema.schema_url.name().to_owned()], + LoadedSemconvRegistry::Resolved(schema) => vec![schema.registry_id.to_owned()], LoadedSemconvRegistry::ResolvedV2(schema) => vec![schema.schema_url.name().to_owned()], } } From a7735b981226dc35ab561da83ddba7a488ab2e45 Mon Sep 17 00:00:00 2001 From: Liudmila Molkova Date: Sat, 14 Feb 2026 14:24:34 -0800 Subject: [PATCH 12/27] cleanup --- crates/weaver_emit/src/lib.rs | 2 +- crates/weaver_forge/src/lib.rs | 2 +- crates/weaver_forge/src/v2/registry.rs | 2 +- crates/weaver_live_check/src/live_checker.rs | 5 +- crates/weaver_mcp/src/service.rs | 5 +- crates/weaver_resolved_schema/src/v2/mod.rs | 2 +- crates/weaver_resolver/src/dependency.rs | 2 +- crates/weaver_resolver/src/lib.rs | 2 +- crates/weaver_resolver/src/registry.rs | 2 +- crates/weaver_search/src/lib.rs | 2 +- crates/weaver_semconv/src/lib.rs | 1 + crates/weaver_semconv/src/manifest.rs | 256 +++++++------ crates/weaver_semconv/src/registry.rs | 5 +- crates/weaver_semconv/src/registry_repo.rs | 3 +- crates/weaver_semconv/src/schema_url.rs | 356 +++++++++++++++++++ crates/weaver_semconv_gen/src/v2.rs | 2 +- tests/resolution_process.rs | 6 +- 17 files changed, 507 insertions(+), 148 deletions(-) create mode 100644 crates/weaver_semconv/src/schema_url.rs diff --git a/crates/weaver_emit/src/lib.rs b/crates/weaver_emit/src/lib.rs index c00c0b070..ca034ac48 100644 --- a/crates/weaver_emit/src/lib.rs +++ b/crates/weaver_emit/src/lib.rs @@ -268,7 +268,7 @@ mod tests { use weaver_semconv::{ attribute::{AttributeType, Examples, PrimitiveOrArrayTypeSpec, RequirementLevel}, group::{GroupType, InstrumentSpec, SpanKindSpec}, - manifest::SchemaUrl, + schema_url::SchemaUrl, stability::Stability, }; diff --git a/crates/weaver_forge/src/lib.rs b/crates/weaver_forge/src/lib.rs index ba2f59a0d..f0637853a 100644 --- a/crates/weaver_forge/src/lib.rs +++ b/crates/weaver_forge/src/lib.rs @@ -841,8 +841,8 @@ mod tests { use weaver_common::vdir::VirtualDirectoryPath; use weaver_diff::diff_dir; use weaver_resolver::{LoadedSemconvRegistry, SchemaResolver}; - use weaver_semconv::manifest::SchemaUrl; use weaver_semconv::registry_repo::RegistryRepo; + use weaver_semconv::schema_url::SchemaUrl; use crate::config::{ApplicationMode, CaseConvention, Params, TemplateConfig, WeaverConfig}; use crate::debug::print_dedup_errors; diff --git a/crates/weaver_forge/src/v2/registry.rs b/crates/weaver_forge/src/v2/registry.rs index 2db318af5..d67117dbe 100644 --- a/crates/weaver_forge/src/v2/registry.rs +++ b/crates/weaver_forge/src/v2/registry.rs @@ -3,7 +3,7 @@ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use weaver_resolved_schema::{attribute::AttributeRef, v2::catalog::AttributeCatalog}; -use weaver_semconv::manifest::SchemaUrl; +use weaver_semconv::schema_url::SchemaUrl; use crate::{ error::Error, diff --git a/crates/weaver_live_check/src/live_checker.rs b/crates/weaver_live_check/src/live_checker.rs index c7f960da5..77e1547fd 100644 --- a/crates/weaver_live_check/src/live_checker.rs +++ b/crates/weaver_live_check/src/live_checker.rs @@ -64,7 +64,8 @@ impl LiveChecker { } } for attribute in &group.attributes { - let attribute_rc = Rc::new(VersionedAttribute::V1(Box::new(attribute.clone()))); + let attribute_rc = + Rc::new(VersionedAttribute::V1(Box::new(attribute.clone()))); match attribute.r#type { AttributeType::Template(_) => { templates_by_length @@ -195,7 +196,7 @@ mod tests { YamlValue, }; use weaver_semconv::{ - manifest::SchemaUrl, + schema_url::SchemaUrl, v2::{span::SpanName, CommonFields}, }; diff --git a/crates/weaver_mcp/src/service.rs b/crates/weaver_mcp/src/service.rs index 9850dbd6d..06afebb22 100644 --- a/crates/weaver_mcp/src/service.rs +++ b/crates/weaver_mcp/src/service.rs @@ -58,7 +58,8 @@ impl WeaverMcpService { let search_context = Arc::new(SearchContext::from_registry(®istry)); // Create versioned registry wrapper once for live check - let versioned_registry = Arc::new(VersionedRegistry::V2(Box::new(registry.as_ref().clone()))); + let versioned_registry = + Arc::new(VersionedRegistry::V2(Box::new(registry.as_ref().clone()))); Self { search_context, @@ -386,7 +387,7 @@ mod tests { use weaver_search::SearchType; use weaver_semconv::attribute::AttributeType; use weaver_semconv::group::{InstrumentSpec, SpanKindSpec}; - use weaver_semconv::manifest::SchemaUrl; + use weaver_semconv::schema_url::SchemaUrl; use weaver_semconv::stability::Stability; use weaver_semconv::v2::span::SpanName; use weaver_semconv::v2::CommonFields; diff --git a/crates/weaver_resolved_schema/src/v2/mod.rs b/crates/weaver_resolved_schema/src/v2/mod.rs index c3bcf416f..20b46b682 100644 --- a/crates/weaver_resolved_schema/src/v2/mod.rs +++ b/crates/weaver_resolved_schema/src/v2/mod.rs @@ -7,7 +7,7 @@ use serde::{Deserialize, Serialize}; use weaver_semconv::{ deprecated::Deprecated, group::GroupType, - manifest::SchemaUrl, + schema_url::SchemaUrl, v2::{ attribute_group::AttributeGroupVisibilitySpec, signal_id::SignalId, span::SpanName, CommonFields, diff --git a/crates/weaver_resolver/src/dependency.rs b/crates/weaver_resolver/src/dependency.rs index 56f041de8..7c6caee8d 100644 --- a/crates/weaver_resolver/src/dependency.rs +++ b/crates/weaver_resolver/src/dependency.rs @@ -20,7 +20,7 @@ use crate::{attribute::AttributeCatalog, Error}; pub(crate) enum ResolvedDependency { /// A V1 Dependency V1(Box), - // A V2 Dependency + /// A V2 Dependency V2(Box), } diff --git a/crates/weaver_resolver/src/lib.rs b/crates/weaver_resolver/src/lib.rs index b4a61142a..d980dfed1 100644 --- a/crates/weaver_resolver/src/lib.rs +++ b/crates/weaver_resolver/src/lib.rs @@ -3,7 +3,7 @@ #![doc = include_str!("../README.md")] use weaver_semconv::group::ImportsWithProvenance; -use weaver_semconv::manifest::SchemaUrl; +use weaver_semconv::schema_url::SchemaUrl; use crate::attribute::AttributeCatalog; use crate::dependency::ResolvedDependency; diff --git a/crates/weaver_resolver/src/registry.rs b/crates/weaver_resolver/src/registry.rs index 3916363bf..f5f3620cc 100644 --- a/crates/weaver_resolver/src/registry.rs +++ b/crates/weaver_resolver/src/registry.rs @@ -851,7 +851,7 @@ mod tests { use std::error::Error; use std::fs::OpenOptions; use std::path::PathBuf; - use weaver_semconv::manifest::SchemaUrl; + use weaver_semconv::schema_url::SchemaUrl; use glob::glob; use serde::Serialize; diff --git a/crates/weaver_search/src/lib.rs b/crates/weaver_search/src/lib.rs index 8525af99f..3c7154782 100644 --- a/crates/weaver_search/src/lib.rs +++ b/crates/weaver_search/src/lib.rs @@ -478,7 +478,7 @@ mod tests { use weaver_semconv::attribute::AttributeType; use weaver_semconv::deprecated::Deprecated; use weaver_semconv::group::{InstrumentSpec, SpanKindSpec}; - use weaver_semconv::manifest::SchemaUrl; + use weaver_semconv::schema_url::SchemaUrl; use weaver_semconv::stability::Stability; use weaver_semconv::v2::span::SpanName; use weaver_semconv::v2::CommonFields; diff --git a/crates/weaver_semconv/src/lib.rs b/crates/weaver_semconv/src/lib.rs index df0346e0b..7e312cac2 100644 --- a/crates/weaver_semconv/src/lib.rs +++ b/crates/weaver_semconv/src/lib.rs @@ -21,6 +21,7 @@ pub mod manifest; pub mod provenance; pub mod registry; pub mod registry_repo; +pub mod schema_url; pub mod semconv; pub mod stability; pub mod stats; diff --git a/crates/weaver_semconv/src/manifest.rs b/crates/weaver_semconv/src/manifest.rs index 65d66903d..d8f268368 100644 --- a/crates/weaver_semconv/src/manifest.rs +++ b/crates/weaver_semconv/src/manifest.rs @@ -8,144 +8,16 @@ //! In the future, this struct may be extended to include additional information //! such as the registry's owner, maintainers, and dependencies. +use crate::schema_url::SchemaUrl; use crate::stability::Stability; use crate::Error; use crate::Error::{InvalidRegistryManifest, RegistryManifestNotFound}; use schemars::JsonSchema; -use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use serde::{Deserialize, Deserializer, Serialize}; use std::path::PathBuf; -use std::sync::OnceLock; use weaver_common::error::handle_errors; use weaver_common::vdir::VirtualDirectoryPath; -/// Represents the schema URL of a registry, which serves as a unique identifier for the registry -/// along with its version. -#[derive(Debug, Clone, JsonSchema)] -pub struct SchemaUrl { - /// The schema URL string. - pub url: String, - #[serde(skip)] - #[schemars(skip)] - name: OnceLock, - #[serde(skip)] - #[schemars(skip)] - version: OnceLock, -} - -impl SchemaUrl { - /// Create a new SchemaUrl from a string. - #[must_use] - pub fn new(url: String) -> Self { - Self { - url, - name: OnceLock::new(), - version: OnceLock::new(), - } - } - - /// Get the URL as a string. - pub fn as_str(&self) -> &str { - &self.url - } - - /// Validate the schema URL format. - pub fn validate(&self) -> Result<(), String> { - let parsed = url::Url::parse(&self.url).map_err(|e| format!("Invalid schema URL: {e}"))?; - if parsed.path_segments().map(|c| c.count()).unwrap_or(0) == 0 { - return Err("The schema URL must have at least one path segment.".to_owned()); - } - Ok(()) - } - - /// Returns the registry name, derived from the schema URL. - #[must_use] - pub fn name(&self) -> &str { - self.name.get_or_init(|| { - let parsed_url = url::Url::parse(&self.url).expect("schema_url must be valid"); - let path = parsed_url.path().trim_matches('/'); - let mut segments: Vec<&str> = path.split('/').collect(); - if !segments.is_empty() { - _ = segments.pop(); - } - - if segments.is_empty() { - return parsed_url.authority().to_owned(); - } - - format!("{}/{}", parsed_url.authority(), segments.join("/")) - }) - } - - /// Returns the registry version, derived from the schema URL. - #[must_use] - pub fn version(&self) -> &str { - self.version.get_or_init(|| { - let parsed_url = url::Url::parse(&self.url).expect("schema_url must be valid"); - parsed_url - .path() - .trim_matches('/') - .rsplit('/') - .next() - .unwrap_or("") - .to_owned() - }) - } - - /// Create a SchemaUrl from name and version. - pub fn from_name_version(name: &str, version: &str) -> Result { - // TODO: replace with scheme regex - let schema_url = SchemaUrl::new( - if name.starts_with("http://") || name.starts_with("https://") { - format!("{}/{}", name.trim_end_matches('/'), version) - } else { - format!("https://{}/{}", name.trim_end_matches('/'), version) - }, - ); - - schema_url.validate()?; - Ok(schema_url) - } -} - -impl PartialEq for SchemaUrl { - fn eq(&self, other: &Self) -> bool { - self.url == other.url - } -} - -impl Eq for SchemaUrl {} - -impl std::hash::Hash for SchemaUrl { - fn hash(&self, state: &mut H) { - self.url.hash(state); - } -} - -impl std::fmt::Display for SchemaUrl { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.url) - } -} - -impl<'de> Deserialize<'de> for SchemaUrl { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - let s = String::deserialize(deserializer)?; - Ok(SchemaUrl::new(s)) - } -} - -impl Serialize for SchemaUrl { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - serializer.serialize_str(&self.url) - } -} - /// Represents the information of a semantic convention registry manifest. /// /// This information defines the registry's name, version, description, and schema @@ -443,4 +315,128 @@ mod tests { panic!("Expected an error, but got a result."); } } + + // Dependency tests + #[test] + fn test_dependency_deserialize_with_schema_url() { + let yaml = r#" +schema_url: "https://opentelemetry.io/schemas/1.0.0" +"#; + let dep: Dependency = serde_yaml::from_str(yaml).expect("Failed to deserialize"); + assert_eq!( + dep.schema_url.as_str(), + "https://opentelemetry.io/schemas/1.0.0" + ); + assert!(dep.registry_path.is_none()); + } + + #[test] + fn test_dependency_deserialize_with_registry_path() { + let yaml = r#" +schema_url: "https://opentelemetry.io/schemas/1.0.0" +registry_path: "./registry" +"#; + let dep: Dependency = serde_yaml::from_str(yaml).expect("Failed to deserialize"); + assert_eq!( + dep.schema_url.as_str(), + "https://opentelemetry.io/schemas/1.0.0" + ); + assert!(dep.registry_path.is_some()); + } + + #[test] + fn test_dependency_deserialize_with_deprecated_name() { + let yaml = r#" +name: "acme-registry" +"#; + let dep: Dependency = serde_yaml::from_str(yaml).expect("Failed to deserialize"); + assert_eq!(dep.schema_url.as_str(), "acme-registry/unknown"); + } + + #[test] + fn test_dependency_deserialize_schema_url_takes_precedence() { + let yaml = r#" +schema_url: "https://opentelemetry.io/schemas/1.0.0" +name: "ignored-name" +"#; + let dep: Dependency = serde_yaml::from_str(yaml).expect("Failed to deserialize"); + assert_eq!( + dep.schema_url.as_str(), + "https://opentelemetry.io/schemas/1.0.0" + ); + } + + #[test] + fn test_dependency_deserialize_missing_both_fields() { + let yaml = r#" +registry_path: "./registry" +"#; + let result: Result = serde_yaml::from_str(yaml); + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err + .to_string() + .contains("Either 'schema_url' or 'name' must be provided")); + } + + #[test] + fn test_dependency_serialize() { + let dep = Dependency { + schema_url: SchemaUrl::new("https://opentelemetry.io/schemas/1.0.0".to_owned()), + registry_path: None, + name: None, + }; + + let yaml = serde_yaml::to_string(&dep).expect("Failed to serialize"); + // Verify schema_url is serialized + assert!(yaml.contains("schema_url")); + assert!(yaml.contains("https://opentelemetry.io/schemas/1.0.0")); + // Verify name is NOT serialized (skip_serializing) + assert!(!yaml.contains("name:")); + } + + #[test] + fn test_dependency_serialize_with_registry_path() { + let dep = Dependency { + schema_url: SchemaUrl::new("https://opentelemetry.io/schemas/1.0.0".to_owned()), + registry_path: Some(VirtualDirectoryPath::LocalFolder { + path: "./registry".to_owned(), + }), + name: None, + }; + + let yaml = serde_yaml::to_string(&dep).expect("Failed to serialize"); + assert!(yaml.contains("schema_url")); + assert!(yaml.contains("registry_path")); + } + + #[test] + fn test_dependency_serialize_without_optional_path() { + let dep = Dependency { + schema_url: SchemaUrl::new("https://opentelemetry.io/schemas/1.0.0".to_owned()), + registry_path: None, + name: None, + }; + + let yaml = serde_yaml::to_string(&dep).expect("Failed to serialize"); + // registry_path should not be serialized when None (skip_serializing_if) + assert!(!yaml.contains("registry_path")); + } + + #[test] + fn test_dependency_roundtrip_serialization() { + let original = Dependency { + schema_url: SchemaUrl::new("https://example.com/schemas/1.0.0".to_owned()), + registry_path: Some(VirtualDirectoryPath::LocalFolder { + path: "./test/registry".to_owned(), + }), + name: None, + }; + + let yaml = serde_yaml::to_string(&original).expect("Failed to serialize"); + let deserialized: Dependency = serde_yaml::from_str(&yaml).expect("Failed to deserialize"); + + assert_eq!(original.schema_url, deserialized.schema_url); + assert!(deserialized.registry_path.is_some()); + } } diff --git a/crates/weaver_semconv/src/registry.rs b/crates/weaver_semconv/src/registry.rs index 3fd760d4b..1fe8ee1c6 100644 --- a/crates/weaver_semconv/src/registry.rs +++ b/crates/weaver_semconv/src/registry.rs @@ -5,9 +5,10 @@ use crate::attribute::AttributeSpecWithProvenance; use crate::group::{GroupSpecWithProvenance, ImportsWithProvenance}; use crate::json_schema::JsonSchemaValidator; -use crate::manifest::{RegistryManifest, SchemaUrl}; +use crate::manifest::RegistryManifest; use crate::provenance::Provenance; use crate::registry_repo::RegistryRepo; +use crate::schema_url::SchemaUrl; use crate::semconv::{SemConvSpecV1WithProvenance, SemConvSpecWithProvenance}; use crate::stats::Stats; use crate::Error; @@ -285,10 +286,10 @@ impl SemConvRegistry { mod tests { use crate::attribute::{AttributeSpec, AttributeType, PrimitiveOrArrayTypeSpec}; use crate::group::{GroupSpec, GroupType}; - use crate::manifest::SchemaUrl; use crate::provenance::Provenance; use crate::registry::SemConvRegistry; use crate::registry_repo::RegistryRepo; + use crate::schema_url::SchemaUrl; use crate::semconv::{SemConvSpec, SemConvSpecV1, SemConvSpecWithProvenance}; use crate::Error; diff --git a/crates/weaver_semconv/src/registry_repo.rs b/crates/weaver_semconv/src/registry_repo.rs index e07628534..3a05732b4 100644 --- a/crates/weaver_semconv/src/registry_repo.rs +++ b/crates/weaver_semconv/src/registry_repo.rs @@ -6,7 +6,8 @@ use std::default::Default; use std::path::{Path, PathBuf}; use std::sync::Arc; -use crate::manifest::{Dependency, RegistryManifest, SchemaUrl}; +use crate::manifest::{Dependency, RegistryManifest}; +use crate::schema_url::SchemaUrl; use crate::Error; use weaver_common::vdir::{VirtualDirectory, VirtualDirectoryPath}; use weaver_common::{get_path_type, log_info, log_warn}; diff --git a/crates/weaver_semconv/src/schema_url.rs b/crates/weaver_semconv/src/schema_url.rs new file mode 100644 index 000000000..2aceb7eab --- /dev/null +++ b/crates/weaver_semconv/src/schema_url.rs @@ -0,0 +1,356 @@ +// SPDX-License-Identifier: Apache-2.0 + +//! Schema URL type for uniquely identifying semantic convention registries. + +use schemars::JsonSchema; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use std::sync::OnceLock; + +/// Represents the schema URL of a registry, which serves as a unique identifier for the registry +/// along with its version. +#[derive(Debug, Clone, JsonSchema)] +pub struct SchemaUrl { + /// The schema URL string. + pub url: String, + #[serde(skip)] + #[schemars(skip)] + name: OnceLock, + #[serde(skip)] + #[schemars(skip)] + version: OnceLock, +} + +impl SchemaUrl { + /// Create a new SchemaUrl from a string. + #[must_use] + pub fn new(url: String) -> Self { + Self { + url, + name: OnceLock::new(), + version: OnceLock::new(), + } + } + + /// Get the URL as a string. + pub fn as_str(&self) -> &str { + &self.url + } + + /// Validate the schema URL format. + pub fn validate(&self) -> Result<(), String> { + let parsed = url::Url::parse(&self.url).map_err(|e| format!("Invalid schema URL: {e}"))?; + let has_path = parsed + .path_segments() + .map(|segments| segments.filter(|s| !s.is_empty()).count() > 0) + .unwrap_or(false); + + if !has_path { + return Err("The schema URL must have at least one path segment.".to_owned()); + } + Ok(()) + } + + /// Returns the registry name, derived from the schema URL. + #[must_use] + pub fn name(&self) -> &str { + self.name.get_or_init(|| { + let parsed_url = url::Url::parse(&self.url).expect("schema_url must be valid"); + let path = parsed_url.path().trim_matches('/'); + let mut segments: Vec<&str> = path.split('/').collect(); + if !segments.is_empty() { + _ = segments.pop(); + } + + if segments.is_empty() { + return parsed_url.authority().to_owned(); + } + + format!("{}/{}", parsed_url.authority(), segments.join("/")) + }) + } + + /// Returns the registry version, derived from the schema URL. + #[must_use] + pub fn version(&self) -> &str { + self.version.get_or_init(|| { + let parsed_url = url::Url::parse(&self.url).expect("schema_url must be valid"); + parsed_url + .path() + .trim_matches('/') + .rsplit('/') + .next() + .unwrap_or("") + .to_owned() + }) + } + + /// Create a SchemaUrl from name and version. + pub fn from_name_version(name: &str, version: &str) -> Result { + // TODO: replace with scheme regex + let schema_url = SchemaUrl::new( + if name.starts_with("http://") || name.starts_with("https://") { + format!("{}/{}", name.trim_end_matches('/'), version) + } else { + format!("https://{}/{}", name.trim_end_matches('/'), version) + }, + ); + + schema_url.validate()?; + Ok(schema_url) + } +} + +impl PartialEq for SchemaUrl { + fn eq(&self, other: &Self) -> bool { + self.url == other.url + } +} + +impl Eq for SchemaUrl {} + +impl std::hash::Hash for SchemaUrl { + fn hash(&self, state: &mut H) { + self.url.hash(state); + } +} + +impl std::fmt::Display for SchemaUrl { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.url) + } +} + +impl<'de> Deserialize<'de> for SchemaUrl { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let s = String::deserialize(deserializer)?; + Ok(SchemaUrl::new(s)) + } +} + +impl Serialize for SchemaUrl { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_str(&self.url) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_new_and_as_str() { + let url = "https://opentelemetry.io/schemas/1.0.0"; + let schema_url = SchemaUrl::new(url.to_owned()); + assert_eq!(schema_url.as_str(), url); + } + + #[test] + fn test_validate_valid_url() { + let schema_url = SchemaUrl::new("https://opentelemetry.io/schemas/1.0.0".to_owned()); + assert!(schema_url.validate().is_ok()); + } + + #[test] + fn test_validate_invalid_url_syntax() { + let schema_url = SchemaUrl::new("not a valid url".to_owned()); + assert!(schema_url.validate().is_err()); + } + + #[test] + fn test_validate_url_without_path() { + let schema_url = SchemaUrl::new("https://opentelemetry.io".to_owned()); + let result = schema_url.validate(); + assert!(result.is_err()); + assert!(result.unwrap_err().contains("at least one path segment")); + } + + #[test] + fn test_name_extraction_simple() { + let schema_url = SchemaUrl::new("https://opentelemetry.io/schemas/1.0.0".to_owned()); + assert_eq!(schema_url.name(), "opentelemetry.io/schemas"); + } + + #[test] + fn test_name_extraction_nested_path() { + let schema_url = + SchemaUrl::new("https://opentelemetry.io/schemas/sub-component/1.0.0".to_owned()); + assert_eq!(schema_url.name(), "opentelemetry.io/schemas/sub-component"); + } + + #[test] + fn test_name_extraction_single_segment() { + let schema_url = SchemaUrl::new("https://opentelemetry.io/1.0.0".to_owned()); + assert_eq!(schema_url.name(), "opentelemetry.io"); + } + + #[test] + fn test_name_extraction_with_port() { + let schema_url = SchemaUrl::new("https://example.com:8080/schemas/1.0.0".to_owned()); + assert_eq!(schema_url.name(), "example.com:8080/schemas"); + } + + #[test] + fn test_version_extraction_simple() { + let schema_url = SchemaUrl::new("https://opentelemetry.io/schemas/1.0.0".to_owned()); + assert_eq!(schema_url.version(), "1.0.0"); + } + + #[test] + fn test_version_extraction_semantic_version() { + let schema_url = SchemaUrl::new("https://example.com/schemas/1.2.3".to_owned()); + assert_eq!(schema_url.version(), "1.2.3"); + } + + #[test] + fn test_version_extraction_single_segment() { + let schema_url = SchemaUrl::new("https://example.com/v1".to_owned()); + assert_eq!(schema_url.version(), "v1"); + } + + #[test] + fn test_from_name_version_with_https() { + let result = SchemaUrl::from_name_version("https://opentelemetry.io/schemas", "1.0.0"); + assert!(result.is_ok()); + let schema_url = result.unwrap(); + assert_eq!( + schema_url.as_str(), + "https://opentelemetry.io/schemas/1.0.0" + ); + } + + #[test] + fn test_from_name_version_without_scheme() { + let result = SchemaUrl::from_name_version("opentelemetry.io/schemas", "1.0.0"); + assert!(result.is_ok()); + let schema_url = result.unwrap(); + assert_eq!( + schema_url.as_str(), + "https://opentelemetry.io/schemas/1.0.0" + ); + } + + #[test] + fn test_from_name_version_with_http() { + let result = SchemaUrl::from_name_version("http://example.com/schemas", "1.0.0"); + assert!(result.is_ok()); + let schema_url = result.unwrap(); + assert_eq!(schema_url.as_str(), "http://example.com/schemas/1.0.0"); + } + + #[test] + fn test_from_name_version_with_trailing_slash() { + let result = SchemaUrl::from_name_version("https://example.com/schemas/", "1.0.0"); + assert!(result.is_ok()); + let schema_url = result.unwrap(); + assert_eq!(schema_url.as_str(), "https://example.com/schemas/1.0.0"); + } + + #[test] + fn test_equality() { + let url1 = SchemaUrl::new("https://example.com/schemas/1.0.0".to_owned()); + let url2 = SchemaUrl::new("https://example.com/schemas/1.0.0".to_owned()); + let url3 = SchemaUrl::new("https://example.com/schemas/2.0.0".to_owned()); + + assert_eq!(url1, url2); + assert_ne!(url1, url3); + } + + #[test] + fn test_hash() { + use std::collections::hash_map::DefaultHasher; + use std::hash::{Hash, Hasher}; + + let url1 = SchemaUrl::new("https://example.com/schemas/1.0.0".to_owned()); + let url2 = SchemaUrl::new("https://example.com/schemas/1.0.0".to_owned()); + + let mut hasher1 = DefaultHasher::new(); + url1.hash(&mut hasher1); + let hash1 = hasher1.finish(); + + let mut hasher2 = DefaultHasher::new(); + url2.hash(&mut hasher2); + let hash2 = hasher2.finish(); + + assert_eq!(hash1, hash2); + } + + #[test] + fn test_display() { + let schema_url = SchemaUrl::new("https://example.com/schemas/1.0.0".to_owned()); + assert_eq!( + format!("{}", schema_url), + "https://example.com/schemas/1.0.0" + ); + } + + #[test] + fn test_serialize() { + let schema_url = SchemaUrl::new("https://example.com/schemas/1.0.0".to_owned()); + let json = serde_json::to_string(&schema_url).unwrap(); + assert_eq!(json, "\"https://example.com/schemas/1.0.0\""); + } + + #[test] + fn test_deserialize() { + let json = "\"https://example.com/schemas/1.0.0\""; + let schema_url: SchemaUrl = serde_json::from_str(json).unwrap(); + assert_eq!(schema_url.as_str(), "https://example.com/schemas/1.0.0"); + } + + #[test] + fn test_serialize_deserialize_roundtrip() { + let original = SchemaUrl::new("https://opentelemetry.io/schemas/1.0.0".to_owned()); + let json = serde_json::to_string(&original).unwrap(); + let deserialized: SchemaUrl = serde_json::from_str(&json).unwrap(); + assert_eq!(original, deserialized); + } + + #[test] + fn test_name_caching() { + let schema_url = SchemaUrl::new("https://opentelemetry.io/schemas/1.0.0".to_owned()); + + // Call name() twice and verify they return the same reference + let name1 = schema_url.name(); + let name2 = schema_url.name(); + + assert_eq!(name1, name2); + assert_eq!(name1, "opentelemetry.io/schemas"); + + // Verify we're getting the same pointer (cached value) + assert_eq!(name1.as_ptr(), name2.as_ptr()); + } + + #[test] + fn test_version_caching() { + let schema_url = SchemaUrl::new("https://opentelemetry.io/schemas/1.0.0".to_owned()); + + // Call version() twice and verify they return the same reference + let version1 = schema_url.version(); + let version2 = schema_url.version(); + + assert_eq!(version1, version2); + assert_eq!(version1, "1.0.0"); + + // Verify we're getting the same pointer (cached value) + assert_eq!(version1.as_ptr(), version2.as_ptr()); + } + + #[test] + fn test_clone_preserves_url_but_resets_cache() { + let original = SchemaUrl::new("https://opentelemetry.io/schemas/1.0.0".to_owned()); + + // Access name to populate cache + let _ = original.name(); + + // Clone should have the same URL but empty cache + let cloned = original.clone(); + assert_eq!(original.as_str(), cloned.as_str()); + assert_eq!(original.name(), cloned.name()); + } +} diff --git a/crates/weaver_semconv_gen/src/v2.rs b/crates/weaver_semconv_gen/src/v2.rs index 586080a7a..7393b10ea 100644 --- a/crates/weaver_semconv_gen/src/v2.rs +++ b/crates/weaver_semconv_gen/src/v2.rs @@ -413,7 +413,7 @@ mod tests { use weaver_semconv::{ attribute::{AttributeType, PrimitiveOrArrayTypeSpec}, group::InstrumentSpec, - manifest::SchemaUrl, + schema_url::SchemaUrl, v2::{span::SpanName, CommonFields}, }; diff --git a/tests/resolution_process.rs b/tests/resolution_process.rs index 3d7c009d5..f428c3280 100644 --- a/tests/resolution_process.rs +++ b/tests/resolution_process.rs @@ -6,7 +6,7 @@ use miette::Diagnostic; use weaver_common::vdir::VirtualDirectoryPath; use weaver_resolver::SchemaResolver; -use weaver_semconv::{manifest::SchemaUrl, registry_repo::RegistryRepo}; +use weaver_semconv::{registry_repo::RegistryRepo, schema_url::SchemaUrl}; /// The URL of the official semantic convention registry. const SEMCONV_REGISTRY_URL: &str = "https://github.com/open-telemetry/semantic-conventions.git"; @@ -34,7 +34,9 @@ fn test_cli_interface() { refspec: None, }; - let schema_url = Some(SchemaUrl::new("https://opelemetry.io/schemas/1.40.0".to_owned())); + let schema_url = Some(SchemaUrl::new( + "https://opelemetry.io/schemas/1.40.0".to_owned(), + )); let registry_repo = RegistryRepo::try_new(schema_url, ®istry_path).unwrap_or_else(|e| { panic!("Failed to create the registry repo, error: {e}"); }); From 404d05a013aae0213cf95c94b88ac434b57c0187 Mon Sep 17 00:00:00 2001 From: Liudmila Molkova Date: Sat, 14 Feb 2026 14:56:49 -0800 Subject: [PATCH 13/27] Update crates/weaver_resolver/data/circular-registry-test/registry_b/registry_manifest.yaml Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- .../circular-registry-test/registry_b/registry_manifest.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/weaver_resolver/data/circular-registry-test/registry_b/registry_manifest.yaml b/crates/weaver_resolver/data/circular-registry-test/registry_b/registry_manifest.yaml index f4d66d2e4..d805ab3e9 100644 --- a/crates/weaver_resolver/data/circular-registry-test/registry_b/registry_manifest.yaml +++ b/crates/weaver_resolver/data/circular-registry-test/registry_b/registry_manifest.yaml @@ -4,5 +4,5 @@ semconv_version: 0.1.0 schema_base_url: https://example.com/registry_b/schemas/ dependencies: - name: registry_a - # schema_url: is not necessry here, we're using deprecated, but valid foe now `name` + # schema_url: is not necessary here, we're using deprecated, but valid for now `name` registry_path: data/circular-registry-test/registry_a \ No newline at end of file From 29d557b8e0a646adaea280c641888f9685d41124 Mon Sep 17 00:00:00 2001 From: Liudmila Molkova Date: Sat, 14 Feb 2026 14:57:25 -0800 Subject: [PATCH 14/27] Update crates/weaver_resolver/data/registry-test-published-1/registry/registry_manifest.yaml Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- .../registry-test-published-1/registry/registry_manifest.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/weaver_resolver/data/registry-test-published-1/registry/registry_manifest.yaml b/crates/weaver_resolver/data/registry-test-published-1/registry/registry_manifest.yaml index d48e2ab4e..be01236bd 100644 --- a/crates/weaver_resolver/data/registry-test-published-1/registry/registry_manifest.yaml +++ b/crates/weaver_resolver/data/registry-test-published-1/registry/registry_manifest.yaml @@ -3,5 +3,5 @@ description: This registry contains the semantic conventions for the Acme vendor semconv_version: 0.1.0 schema_base_url: https://acme.com/schemas/ dependencies: - - schema_url: https://example.com/schemas/1.2.3 + - schema_url: https://opentelemetry.io/schemas/1.0.0 registry_path: data/registry-test-published-1/published From 90a8b601066d8627944603aa2022e7cca2d2da2f Mon Sep 17 00:00:00 2001 From: Liudmila Molkova Date: Sat, 14 Feb 2026 14:58:05 -0800 Subject: [PATCH 15/27] cleanup --- crates/weaver_semconv/src/manifest.rs | 2 +- crates/weaver_semconv/src/registry_repo.rs | 8 +++----- tests/resolution_process.rs | 2 +- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/crates/weaver_semconv/src/manifest.rs b/crates/weaver_semconv/src/manifest.rs index d8f268368..af8a4b270 100644 --- a/crates/weaver_semconv/src/manifest.rs +++ b/crates/weaver_semconv/src/manifest.rs @@ -74,7 +74,7 @@ pub struct RegistryManifest { pub resolved_schema_uri: Option, } -/// Represents a dependency of a semantic convention registry as defined in YAML. +/// Represents a dependency of a semantic convention registry. #[derive(Serialize, Debug, Clone, JsonSchema)] pub struct Dependency { /// The schema URL for the dependency (required). diff --git a/crates/weaver_semconv/src/registry_repo.rs b/crates/weaver_semconv/src/registry_repo.rs index 3a05732b4..13137765a 100644 --- a/crates/weaver_semconv/src/registry_repo.rs +++ b/crates/weaver_semconv/src/registry_repo.rs @@ -45,7 +45,7 @@ impl RegistryRepo { /// Creates a new `RegistryRepo` from a `Dependency` object that specifies the schema URL and path. pub fn try_new_dependency(dependency: &Dependency) -> Result { let path = dependency.registry_path.clone().unwrap_or_else(|| { - // If no registry path is provided, we assume it's the same as the parent registry. + // If no registry path is provided, we assume it's the same schema_url. VirtualDirectoryPath::RemoteArchive { url: dependency.schema_url.to_string(), sub_folder: None, @@ -195,15 +195,13 @@ impl RegistryRepo { } } - /// Returns the registry schema URL, if available in the manifest. + /// Returns the registry schema URL. #[must_use] pub fn schema_url(&self) -> SchemaUrl { - // TODO: we should never have a registry without a schema URL at this point - // but not sure how to do it in terms of API design - // but for now we can just panic if we don't find a schema URL self.manifest .as_ref() .and_then(|manifest| manifest.schema_url.clone()) + // we should never have a registry without a schema URL at this point .expect("Schema URL must have been provided") } } diff --git a/tests/resolution_process.rs b/tests/resolution_process.rs index f428c3280..58bcf8e16 100644 --- a/tests/resolution_process.rs +++ b/tests/resolution_process.rs @@ -35,7 +35,7 @@ fn test_cli_interface() { }; let schema_url = Some(SchemaUrl::new( - "https://opelemetry.io/schemas/1.40.0".to_owned(), + "https://opentelemetry.io/schemas/1.40.0".to_owned(), )); let registry_repo = RegistryRepo::try_new(schema_url, ®istry_path).unwrap_or_else(|e| { panic!("Failed to create the registry repo, error: {e}"); From a47c54bb6728469a764a205bd63b4a836a421523 Mon Sep 17 00:00:00 2001 From: Liudmila Molkova Date: Sat, 14 Feb 2026 15:01:45 -0800 Subject: [PATCH 16/27] cleanup --- crates/weaver_semconv/src/registry_repo.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/crates/weaver_semconv/src/registry_repo.rs b/crates/weaver_semconv/src/registry_repo.rs index e2e6f9dd5..13137765a 100644 --- a/crates/weaver_semconv/src/registry_repo.rs +++ b/crates/weaver_semconv/src/registry_repo.rs @@ -45,11 +45,7 @@ impl RegistryRepo { /// Creates a new `RegistryRepo` from a `Dependency` object that specifies the schema URL and path. pub fn try_new_dependency(dependency: &Dependency) -> Result { let path = dependency.registry_path.clone().unwrap_or_else(|| { -<<<<<<< HEAD // If no registry path is provided, we assume it's the same schema_url. -======= - // If no registry path is provided, we assume it's the same as the parent registry. ->>>>>>> 29d557b8e0a646adaea280c641888f9685d41124 VirtualDirectoryPath::RemoteArchive { url: dependency.schema_url.to_string(), sub_folder: None, From c4b60b2999103b3d330748f471a886abb26b0fb0 Mon Sep 17 00:00:00 2001 From: Liudmila Molkova Date: Sat, 14 Feb 2026 17:55:06 -0800 Subject: [PATCH 17/27] cleanup --- crates/weaver_emit/src/lib.rs | 2 +- crates/weaver_forge/src/lib.rs | 6 +- crates/weaver_forge/src/v2/registry.rs | 4 +- crates/weaver_live_check/src/live_checker.rs | 12 +- crates/weaver_mcp/src/service.rs | 2 +- crates/weaver_resolved_schema/src/v2/mod.rs | 32 ++-- crates/weaver_resolver/src/lib.rs | 9 +- crates/weaver_resolver/src/registry.rs | 5 +- crates/weaver_search/src/lib.rs | 2 +- crates/weaver_semconv/src/manifest.rs | 180 ++++++++----------- crates/weaver_semconv/src/registry.rs | 16 +- crates/weaver_semconv/src/registry_repo.rs | 56 +++--- crates/weaver_semconv/src/schema_url.rs | 114 +++++++----- crates/weaver_semconv_gen/src/v2.rs | 2 +- tests/resolution_process.rs | 5 +- 15 files changed, 218 insertions(+), 229 deletions(-) diff --git a/crates/weaver_emit/src/lib.rs b/crates/weaver_emit/src/lib.rs index ca034ac48..e076982b0 100644 --- a/crates/weaver_emit/src/lib.rs +++ b/crates/weaver_emit/src/lib.rs @@ -591,7 +591,7 @@ mod tests { }; let registry = ForgeResolvedRegistry { - schema_url: SchemaUrl::new("https://example.com/schemas/1.2.3".to_owned()), + schema_url: SchemaUrl::try_new("https://example.com/schemas/1.2.3".to_owned()).unwrap(), registry: Registry { attributes: vec![], attribute_groups: vec![], diff --git a/crates/weaver_forge/src/lib.rs b/crates/weaver_forge/src/lib.rs index f0637853a..a6ca74fd4 100644 --- a/crates/weaver_forge/src/lib.rs +++ b/crates/weaver_forge/src/lib.rs @@ -856,7 +856,7 @@ mod tests { cli_params: Params, ignore_non_fatal_errors: bool, ) -> (TemplateEngine, ResolvedRegistry, PathBuf, PathBuf) { - let schema_url = Some(SchemaUrl::new("https://default/1.0.0".to_owned())); + let schema_url = Some(SchemaUrl::try_new("https://default/1.0.0".to_owned()).unwrap()); let path: VirtualDirectoryPath = "data/registry" .try_into() .expect("Invalid virtual directory path string"); @@ -1060,7 +1060,7 @@ mod tests { let path: VirtualDirectoryPath = "data/registry" .try_into() .expect("Invalid virtual directory path string"); - let schema_url = Some(SchemaUrl::new("https://default/1.0.0".to_owned())); + let schema_url = Some(SchemaUrl::try_new("https://default/1.0.0".to_owned()).unwrap()); let repo = RegistryRepo::try_new(schema_url, &path).expect("Failed to construct repository"); let loaded = SchemaResolver::load_semconv_repository(repo, false) @@ -1190,7 +1190,7 @@ mod tests { let path: VirtualDirectoryPath = "data/mini_registry_for_comments" .try_into() .expect("Invalid virtual directory path string"); - let schema_url = Some(SchemaUrl::new("https://default/1.0.0".to_owned())); + let schema_url = Some(SchemaUrl::try_new("https://default/1.0.0".to_owned()).unwrap()); let repo = RegistryRepo::try_new(schema_url, &path).expect("Failed to construct repository"); let loaded = SchemaResolver::load_semconv_repository(repo, false) diff --git a/crates/weaver_forge/src/v2/registry.rs b/crates/weaver_forge/src/v2/registry.rs index d67117dbe..4cdc28556 100644 --- a/crates/weaver_forge/src/v2/registry.rs +++ b/crates/weaver_forge/src/v2/registry.rs @@ -448,7 +448,7 @@ mod tests { fn test_try_from_resolved_schema() { let resolved_schema = ResolvedTelemetrySchema { file_format: "2.0.0".to_owned(), - schema_url: SchemaUrl::new("https://example.com/schema".to_owned()), + schema_url: SchemaUrl::try_new("https://example.com/schema".to_owned()).unwrap(), attribute_catalog: vec![attribute::Attribute { key: "test.attr".to_owned(), r#type: AttributeType::PrimitiveOrArray(PrimitiveOrArrayTypeSpec::String), @@ -610,7 +610,7 @@ mod tests { fn test_try_from_resolved_schema_with_missing_attribute() { let resolved_schema = ResolvedTelemetrySchema { file_format: "2.0.0".to_owned(), - schema_url: SchemaUrl::new("https://example.com/schema".to_owned()), + schema_url: SchemaUrl::try_new("https://example.com/schema".to_owned()).unwrap(), attribute_catalog: vec![], registry: v2::registry::Registry { attributes: vec![], // No attributes - This is the logic bug. diff --git a/crates/weaver_live_check/src/live_checker.rs b/crates/weaver_live_check/src/live_checker.rs index 77e1547fd..d956f8fe5 100644 --- a/crates/weaver_live_check/src/live_checker.rs +++ b/crates/weaver_live_check/src/live_checker.rs @@ -512,7 +512,8 @@ mod tests { fn make_registry(use_v2: bool) -> VersionedRegistry { if use_v2 { VersionedRegistry::V2(Box::new(ForgeResolvedRegistry { - schema_url: SchemaUrl::new("https://example.com/schemas/1.2.3".to_owned()), + schema_url: SchemaUrl::try_new("https://example.com/schemas/1.2.3".to_owned()) + .unwrap(), registry: Registry { attributes: vec![ V2Attribute { @@ -798,7 +799,8 @@ mod tests { }; VersionedRegistry::V2(Box::new(ForgeResolvedRegistry { - schema_url: SchemaUrl::new("https://example.com/schemas/1.2.3".to_owned()), + schema_url: SchemaUrl::try_new("https://example.com/schemas/1.2.3".to_owned()) + .unwrap(), registry: Registry { attributes: vec![memory_state_attr.clone()], attribute_groups: vec![], @@ -1006,7 +1008,8 @@ mod tests { }; VersionedRegistry::V2(Box::new(ForgeResolvedRegistry { - schema_url: SchemaUrl::new("https://example.com/schemas/1.2.3".to_owned()), + schema_url: SchemaUrl::try_new("https://example.com/schemas/1.2.3".to_owned()) + .unwrap(), registry: Registry { attributes: vec![custom_string_attr.clone()], attribute_groups: vec![], @@ -1520,7 +1523,8 @@ mod tests { }; VersionedRegistry::V2(Box::new(ForgeResolvedRegistry { - schema_url: SchemaUrl::new("https://example.com/schemas/1.2.3".to_owned()), + schema_url: SchemaUrl::try_new("https://example.com/schemas/1.2.3".to_owned()) + .unwrap(), registry: Registry { attributes: vec![session_id_attr.clone(), session_previous_id_attr.clone()], attribute_groups: vec![], diff --git a/crates/weaver_mcp/src/service.rs b/crates/weaver_mcp/src/service.rs index 06afebb22..4264701b6 100644 --- a/crates/weaver_mcp/src/service.rs +++ b/crates/weaver_mcp/src/service.rs @@ -394,7 +394,7 @@ mod tests { fn make_test_registry() -> ForgeResolvedRegistry { ForgeResolvedRegistry { - schema_url: SchemaUrl::new("https://todo/1.0.0".to_owned()), + schema_url: SchemaUrl::try_new("https://todo/1.0.0".to_owned()).unwrap(), registry: Registry { attributes: vec![Attribute { key: "http.request.method".to_owned(), diff --git a/crates/weaver_resolved_schema/src/v2/mod.rs b/crates/weaver_resolved_schema/src/v2/mod.rs index 20b46b682..43f1f8f04 100644 --- a/crates/weaver_resolved_schema/src/v2/mod.rs +++ b/crates/weaver_resolved_schema/src/v2/mod.rs @@ -123,21 +123,21 @@ impl TryFrom for ResolvedTelemetrySchema { fn try_from(value: crate::ResolvedTelemetrySchema) -> Result { let (attribute_catalog, registry, refinements) = convert_v1_to_v2(value.catalog, value.registry)?; - let schema_url = SchemaUrl::new(value.schema_url); + let schema_url_str = value.schema_url.clone(); + let schema_url = SchemaUrl::try_new(value.schema_url).map_err(|e| { + crate::error::Error::InvalidSchemaUrl { + url: schema_url_str, + error: e, + } + })?; - match schema_url.validate() { - Ok(_) => Ok(ResolvedTelemetrySchema { - file_format: V2_RESOLVED_FILE_FORMAT.to_owned(), - schema_url, - attribute_catalog, - registry, - refinements, - }), - Err(e) => Err(crate::error::Error::InvalidSchemaUrl { - url: schema_url.to_string(), - error: e.clone(), - }), - } + Ok(ResolvedTelemetrySchema { + file_format: V2_RESOLVED_FILE_FORMAT.to_owned(), + schema_url, + attribute_catalog, + registry, + refinements, + }) } } @@ -1009,7 +1009,7 @@ mod tests { assert_eq!(v2_schema.file_format, V2_RESOLVED_FILE_FORMAT); assert_eq!( v2_schema.schema_url, - SchemaUrl::new("http://test/schemas/1.0.0".to_owned()) + SchemaUrl::try_new("http://test/schemas/1.0.0".to_owned()).unwrap() ); } @@ -1218,7 +1218,7 @@ mod tests { fn empty_v2_schema() -> ResolvedTelemetrySchema { ResolvedTelemetrySchema { file_format: V2_RESOLVED_FILE_FORMAT.to_owned(), - schema_url: SchemaUrl::new("http://test/schemas/1.0".to_owned()), + schema_url: SchemaUrl::try_new("http://test/schemas/1.0".to_owned()).unwrap(), attribute_catalog: vec![], registry: Registry { attributes: vec![], diff --git a/crates/weaver_resolver/src/lib.rs b/crates/weaver_resolver/src/lib.rs index d980dfed1..992f504fc 100644 --- a/crates/weaver_resolver/src/lib.rs +++ b/crates/weaver_resolver/src/lib.rs @@ -91,14 +91,9 @@ impl SchemaResolver { } let manifest = repo.manifest().cloned(); let schema_url = if let Some(m) = manifest.as_ref() { - match m.schema_url.clone() { - Some(url) => url, - None => { - return WResult::FatalErr(Error::FailToResolveSchemaUrl {}); - } - } + m.schema_url.clone() } else { - match SchemaUrl::from_name_version(&repo.name(), &repo.version()) { + match SchemaUrl::try_from_name_version(&repo.name(), &repo.version()) { Ok(url) => url, Err(_) => return WResult::FatalErr(Error::FailToResolveSchemaUrl {}), } diff --git a/crates/weaver_resolver/src/registry.rs b/crates/weaver_resolver/src/registry.rs index f5f3620cc..682486a03 100644 --- a/crates/weaver_resolver/src/registry.rs +++ b/crates/weaver_resolver/src/registry.rs @@ -919,7 +919,7 @@ mod tests { let observed_output_dir = PathBuf::from(format!("observed_output/{test_dir}")); std::fs::create_dir_all(observed_output_dir.clone()) .expect("Failed to create observed output directory"); - let schema_url = Some(SchemaUrl::new("https://default/0.1.0".to_owned())); + let schema_url = Some(SchemaUrl::try_new("https://default/0.1.0".to_owned()).unwrap()); let location: VirtualDirectoryPath = format!("{test_dir}/registry") .try_into() .expect("Failed to parse file directory"); @@ -1113,7 +1113,8 @@ groups: path: "data/registry-test-7-spans/registry".to_owned(), }; - let schema_url = Some(SchemaUrl::new("https://local/registry/1.0.0".to_owned())); + let schema_url = + Some(SchemaUrl::try_new("https://local/registry/1.0.0".to_owned()).unwrap()); let repo = RegistryRepo::try_new(schema_url, &path)?; let loaded = SchemaResolver::load_semconv_repository(repo, true).into_result_failing_non_fatal()?; diff --git a/crates/weaver_search/src/lib.rs b/crates/weaver_search/src/lib.rs index 3c7154782..1928c28f2 100644 --- a/crates/weaver_search/src/lib.rs +++ b/crates/weaver_search/src/lib.rs @@ -544,7 +544,7 @@ mod tests { fn make_test_registry() -> ForgeResolvedRegistry { ForgeResolvedRegistry { - schema_url: SchemaUrl::new("https://example.com/schemas/1.2.3".to_owned()), + schema_url: SchemaUrl::try_new("https://example.com/schemas/1.2.3".to_owned()).unwrap(), registry: Registry { attributes: vec![ make_attribute("http.request.method", "HTTP request method", "", false), diff --git a/crates/weaver_semconv/src/manifest.rs b/crates/weaver_semconv/src/manifest.rs index af8a4b270..d0009ab48 100644 --- a/crates/weaver_semconv/src/manifest.rs +++ b/crates/weaver_semconv/src/manifest.rs @@ -14,15 +14,13 @@ use crate::Error; use crate::Error::{InvalidRegistryManifest, RegistryManifestNotFound}; use schemars::JsonSchema; use serde::{Deserialize, Deserializer, Serialize}; -use std::path::PathBuf; -use weaver_common::error::handle_errors; use weaver_common::vdir::VirtualDirectoryPath; /// Represents the information of a semantic convention registry manifest. /// /// This information defines the registry's name, version, description, and schema /// base url. -#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] +#[derive(Serialize, Debug, Clone, JsonSchema)] pub struct RegistryManifest { /// The file format for this registry. /// @@ -34,8 +32,8 @@ pub struct RegistryManifest { /// This URL is populated before registry is published and is used as /// a unique identifier of the registry. It MUST follow OTel schema URL format, which is: /// `http[s]://server[:port]/path/`. - /// See https://github.com/open-telemetry/opentelemetry-specification/blob/v1.53.0/specification/schemas/README.md#schema-url for more details. - pub schema_url: Option, + /// See for more details. + pub schema_url: SchemaUrl, /// An optional description of the registry. /// @@ -121,7 +119,8 @@ impl<'de> Deserialize<'de> for Dependency { let schema_url = match (helper.schema_url, helper.name) { (Some(url), _) => url, - (None, Some(name)) => SchemaUrl::new(format!("{}/unknown", name)), + (None, Some(name)) => SchemaUrl::try_from_name_version(&name, "unknown") + .map_err(serde::de::Error::custom)?, (None, None) => { return Err(serde::de::Error::custom( "Either 'schema_url' or 'name' must be provided for a dependency", @@ -137,6 +136,61 @@ impl<'de> Deserialize<'de> for Dependency { } } +impl<'de> Deserialize<'de> for RegistryManifest { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + #[derive(Deserialize)] + struct RegistryManifestHelper { + file_format: Option, + schema_url: Option, + description: Option, + #[allow(deprecated)] + semconv_version: Option, + #[allow(deprecated)] + schema_base_url: Option, + #[serde(default)] + dependencies: Vec, + #[serde(default)] + stability: Stability, + resolved_schema_uri: Option, + } + + let helper = RegistryManifestHelper::deserialize(deserializer)?; + + let schema_url = if let Some(url) = helper.schema_url { + url + } else { + // Fall back to deprecated fields + let base_url = helper.schema_base_url.as_ref().ok_or_else(|| { + serde::de::Error::custom( + "Either 'schema_url' or both 'schema_base_url' and 'semconv_version' must be provided", + ) + })?; + let version = helper.semconv_version.as_ref().ok_or_else(|| { + serde::de::Error::custom( + "Either 'schema_url' or both 'schema_base_url' and 'semconv_version' must be provided", + ) + })?; + SchemaUrl::try_from_name_version(base_url, version).map_err(serde::de::Error::custom)? + }; + + Ok(RegistryManifest { + file_format: helper.file_format, + schema_url, + description: helper.description, + #[allow(deprecated)] + semconv_version: helper.semconv_version, + #[allow(deprecated)] + schema_base_url: helper.schema_base_url, + dependencies: helper.dependencies, + stability: helper.stability, + resolved_schema_uri: helper.resolved_schema_uri, + }) + } +} + impl RegistryManifest { /// Attempts to load a registry manifest from a file. /// @@ -155,96 +209,21 @@ impl RegistryManifest { error: e.to_string(), })?; let reader = std::io::BufReader::new(file); - let mut manifest: RegistryManifest = + let manifest: RegistryManifest = serde_yaml::from_reader(reader).map_err(|e| InvalidRegistryManifest { path: manifest_path_buf.clone(), error: e.to_string(), })?; - manifest.validate(manifest_path_buf.clone())?; - - // If the schema URL is not provided, populate it using deprecated schema_base_url and semconv_version - // validation would fail if they were not provided - if manifest.schema_url.is_none() { - manifest.schema_url = Some( - SchemaUrl::from_name_version( - &manifest.schema_base_url.clone().unwrap_or_default(), - &manifest.semconv_version.clone().unwrap_or_default(), - ) - .map_err(|e| InvalidRegistryManifest { - path: manifest_path_buf.clone(), - error: e, - })?, - ); - } - Ok(manifest) } - fn validate(&self, path: PathBuf) -> Result<(), Error> { - let mut errors = vec![]; - - if self.schema_url.is_none() { - if self.schema_base_url.is_none() || self.semconv_version.is_none() { - errors.push(InvalidRegistryManifest { - path: path.clone(), - error: "The registry schema URL is required.".to_owned(), - }); - } else { - if self - .schema_base_url - .as_ref() - .map_or(true, |url| url.is_empty()) - { - errors.push(InvalidRegistryManifest { - path: path.clone(), - error: "The registry schema base URL is required.".to_owned(), - }); - } else if let Err(e) = - url::Url::parse(&self.schema_base_url.clone().unwrap_or_default()) - { - errors.push(InvalidRegistryManifest { - path: path.clone(), - error: format!("Invalid schema base URL: {}", e), - }); - } - - if self - .semconv_version - .as_ref() - .map_or(true, |version| version.is_empty()) - { - errors.push(InvalidRegistryManifest { - path: path.clone(), - error: "The registry version is required.".to_owned(), - }); - } - } - } else { - // validate the resolved schema URL: it must be a valid absolute URI with at least one path segment - if let Some(url) = self.schema_url.as_ref() { - url.validate().unwrap_or_else(|e| { - errors.push(InvalidRegistryManifest { - path: path.clone(), - error: format!("Invalid schema URL: {}", e), - }); - }); - } - } - - handle_errors(errors)?; - Ok(()) - } - /// Returns the registry name, which is derived from the schema URL. /// For example, if the schema URL is `https://opentelemetry.io/schemas/sub-component/1.0.0`, /// the registry name would be `opentelemetry.io/schemas/sub-component` #[must_use] pub fn name(&self) -> String { - self.schema_url - .as_ref() - .map(|url| url.name().to_owned()) - .unwrap_or_default() + self.schema_url.name().to_owned() } /// Returns the registry version, which is derived from the schema URL. @@ -252,17 +231,15 @@ impl RegistryManifest { /// the registry version would be `1.0.0` #[must_use] pub fn version(&self) -> String { - self.schema_url - .as_ref() - .map(|url| url.version().to_owned()) - .unwrap_or_default() + self.schema_url.version().to_owned() } } #[cfg(test)] mod tests { + use std::path::PathBuf; + use super::*; - use crate::Error::CompoundError; #[test] fn test_not_found_registry_info() { @@ -298,16 +275,10 @@ mod tests { ); let path = PathBuf::from("tests/test_data/invalid_semconv_registry_manifest.yaml"); - let expected_errs = CompoundError(vec![ - InvalidRegistryManifest { - path: path.clone(), - error: "The registry schema base URL is required.".to_owned(), - }, - InvalidRegistryManifest { - path: path.clone(), - error: "The registry version is required.".to_owned(), - }, - ]); + let expected_errs = InvalidRegistryManifest { + path: path.clone(), + error: "Registry name and version cannot be empty.".to_owned(), + }; if let Err(observed_errs) = result { assert_eq!(observed_errs, expected_errs); @@ -350,7 +321,7 @@ registry_path: "./registry" name: "acme-registry" "#; let dep: Dependency = serde_yaml::from_str(yaml).expect("Failed to deserialize"); - assert_eq!(dep.schema_url.as_str(), "acme-registry/unknown"); + assert_eq!(dep.schema_url.as_str(), "https://acme-registry/unknown"); } #[test] @@ -382,7 +353,8 @@ registry_path: "./registry" #[test] fn test_dependency_serialize() { let dep = Dependency { - schema_url: SchemaUrl::new("https://opentelemetry.io/schemas/1.0.0".to_owned()), + schema_url: SchemaUrl::try_new("https://opentelemetry.io/schemas/1.0.0".to_owned()) + .unwrap(), registry_path: None, name: None, }; @@ -398,7 +370,8 @@ registry_path: "./registry" #[test] fn test_dependency_serialize_with_registry_path() { let dep = Dependency { - schema_url: SchemaUrl::new("https://opentelemetry.io/schemas/1.0.0".to_owned()), + schema_url: SchemaUrl::try_new("https://opentelemetry.io/schemas/1.0.0".to_owned()) + .unwrap(), registry_path: Some(VirtualDirectoryPath::LocalFolder { path: "./registry".to_owned(), }), @@ -413,7 +386,8 @@ registry_path: "./registry" #[test] fn test_dependency_serialize_without_optional_path() { let dep = Dependency { - schema_url: SchemaUrl::new("https://opentelemetry.io/schemas/1.0.0".to_owned()), + schema_url: SchemaUrl::try_new("https://opentelemetry.io/schemas/1.0.0".to_owned()) + .unwrap(), registry_path: None, name: None, }; @@ -426,7 +400,7 @@ registry_path: "./registry" #[test] fn test_dependency_roundtrip_serialization() { let original = Dependency { - schema_url: SchemaUrl::new("https://example.com/schemas/1.0.0".to_owned()), + schema_url: SchemaUrl::try_new("https://example.com/schemas/1.0.0".to_owned()).unwrap(), registry_path: Some(VirtualDirectoryPath::LocalFolder { path: "./test/registry".to_owned(), }), diff --git a/crates/weaver_semconv/src/registry.rs b/crates/weaver_semconv/src/registry.rs index 1fe8ee1c6..931017f97 100644 --- a/crates/weaver_semconv/src/registry.rs +++ b/crates/weaver_semconv/src/registry.rs @@ -144,15 +144,17 @@ impl SemConvRegistry { } } - let schema_url = SchemaUrl::from_name_version(®istry_repo.name(), &semconv_version) - .map_err(|e| Error::InvalidRegistryManifest { - path: registry_repo.registry_path_repr().into(), - error: e.clone(), - })?; + let schema_url = + SchemaUrl::try_from_name_version(®istry_repo.name(), &semconv_version).map_err( + |e| Error::InvalidRegistryManifest { + path: registry_repo.registry_path_repr().into(), + error: e.clone(), + }, + )?; registry.set_manifest(RegistryManifest { file_format: None, - schema_url: Some(schema_url), + schema_url, description: registry_repo.manifest().and_then(|m| m.description.clone()), dependencies: vec![], resolved_schema_uri: None, @@ -395,7 +397,7 @@ mod tests { path: "data".to_owned(), }; let registry_repo = RegistryRepo::try_new( - Some(SchemaUrl::new("https://test/42".to_owned())), + Some(SchemaUrl::try_new("https://test/42".to_owned()).unwrap()), ®istry_path, ) .unwrap(); diff --git a/crates/weaver_semconv/src/registry_repo.rs b/crates/weaver_semconv/src/registry_repo.rs index 13137765a..3b4f41f60 100644 --- a/crates/weaver_semconv/src/registry_repo.rs +++ b/crates/weaver_semconv/src/registry_repo.rs @@ -26,13 +26,11 @@ pub const REGISTRY_MANIFEST: &str = "manifest.yaml"; /// - Initialized from a Git archive /// - A published repository, which is a manifest file /// that denotes where to find aspects of the registry. -#[derive(Default, Debug, Clone)] +#[derive(Debug, Clone)] pub struct RegistryRepo { - // A unique identifier for the registry (e.g. opentelemetry.io/schemas/sub-component). - name: Arc, - - // Registry version - version: Arc, + /// The schema URL associated with the registry + /// May be derived from the manifest or the registry name and version if the manifest is not present. + schema_url: SchemaUrl, // A virtual directory containing the registry. registry: VirtualDirectory, @@ -68,8 +66,7 @@ impl RegistryRepo { if let Some(manifest_path) = { // We need a temporary RegistryRepo to call manifest_path let temp_repo = Self { - name: Arc::from(""), - version: Arc::from(""), + schema_url: SchemaUrl::new_unknown(), registry: registry.clone(), manifest: None, }; @@ -77,24 +74,15 @@ impl RegistryRepo { } { let registry_manifest = RegistryManifest::try_from_file(manifest_path)?; Ok(Self { - name: registry_manifest.name().into(), - version: registry_manifest.version().into(), + schema_url: registry_manifest.schema_url.clone(), registry, manifest: Some(registry_manifest), }) } else { // No manifest + let schema_url_combined = schema_url.unwrap_or_else(SchemaUrl::new_unknown); Ok(Self { - name: Arc::from( - schema_url - .as_ref() - .map_or("unknown".to_owned(), |url| url.name().to_owned()), - ), - version: Arc::from( - schema_url - .as_ref() - .map_or("unknown".to_owned(), |url| url.version().to_owned()), - ), + schema_url: schema_url_combined.clone(), registry, manifest: None, }) @@ -104,21 +92,13 @@ impl RegistryRepo { /// Returns the registry name (from manifest if present, otherwise top-level field). #[must_use] pub fn name(&self) -> Arc { - if let Some(manifest) = &self.manifest { - Arc::from(manifest.name()) - } else { - self.name.clone() - } + self.schema_url.name().into() } /// Returns the registry version (from manifest if present, otherwise top-level field). #[must_use] pub fn version(&self) -> Arc { - if let Some(manifest) = &self.manifest { - Arc::from(manifest.version()) - } else { - self.version.clone() - } + self.schema_url.version().into() } /// Returns the local path to the semconv registry. @@ -198,11 +178,17 @@ impl RegistryRepo { /// Returns the registry schema URL. #[must_use] pub fn schema_url(&self) -> SchemaUrl { - self.manifest - .as_ref() - .and_then(|manifest| manifest.schema_url.clone()) - // we should never have a registry without a schema URL at this point - .expect("Schema URL must have been provided") + self.schema_url.clone() + } +} + +impl Default for RegistryRepo { + fn default() -> Self { + Self { + schema_url: SchemaUrl::new_unknown(), + registry: VirtualDirectory::default(), + manifest: None, + } } } diff --git a/crates/weaver_semconv/src/schema_url.rs b/crates/weaver_semconv/src/schema_url.rs index 2aceb7eab..257f74ad3 100644 --- a/crates/weaver_semconv/src/schema_url.rs +++ b/crates/weaver_semconv/src/schema_url.rs @@ -23,7 +23,7 @@ pub struct SchemaUrl { impl SchemaUrl { /// Create a new SchemaUrl from a string. #[must_use] - pub fn new(url: String) -> Self { + fn new(url: String) -> Self { Self { url, name: OnceLock::new(), @@ -31,6 +31,14 @@ impl SchemaUrl { } } + /// Create a new SchemaUrl from a string with validation. + /// Returns an error if the URL is invalid or doesn't have at least one path segment. + pub fn try_new(url: String) -> Result { + let schema_url = Self::new(url); + schema_url.validate()?; + Ok(schema_url) + } + /// Get the URL as a string. pub fn as_str(&self) -> &str { &self.url @@ -85,18 +93,24 @@ impl SchemaUrl { } /// Create a SchemaUrl from name and version. - pub fn from_name_version(name: &str, version: &str) -> Result { + pub fn try_from_name_version(name: &str, version: &str) -> Result { + if name.trim().is_empty() || version.trim().is_empty() { + return Err("Registry name and version cannot be empty.".to_owned()); + } // TODO: replace with scheme regex - let schema_url = SchemaUrl::new( + SchemaUrl::try_new( if name.starts_with("http://") || name.starts_with("https://") { format!("{}/{}", name.trim_end_matches('/'), version) } else { format!("https://{}/{}", name.trim_end_matches('/'), version) }, - ); + ) + } - schema_url.validate()?; - Ok(schema_url) + /// Returns a default unknown schema URL. + #[must_use] + pub fn new_unknown() -> Self { + Self::new("https://unknown/unknown".to_owned()) } } @@ -146,76 +160,84 @@ mod tests { #[test] fn test_new_and_as_str() { let url = "https://opentelemetry.io/schemas/1.0.0"; - let schema_url = SchemaUrl::new(url.to_owned()); + let schema_url = SchemaUrl::try_new(url.to_owned()).unwrap(); assert_eq!(schema_url.as_str(), url); } - #[test] - fn test_validate_valid_url() { - let schema_url = SchemaUrl::new("https://opentelemetry.io/schemas/1.0.0".to_owned()); - assert!(schema_url.validate().is_ok()); - } - #[test] fn test_validate_invalid_url_syntax() { - let schema_url = SchemaUrl::new("not a valid url".to_owned()); - assert!(schema_url.validate().is_err()); + assert!(SchemaUrl::try_new("not a valid url".to_owned()).is_err()); } #[test] fn test_validate_url_without_path() { - let schema_url = SchemaUrl::new("https://opentelemetry.io".to_owned()); - let result = schema_url.validate(); + let result = SchemaUrl::try_new("https://opentelemetry.io".to_owned()); assert!(result.is_err()); assert!(result.unwrap_err().contains("at least one path segment")); } + #[test] + fn test_try_new_valid_url() { + let result = SchemaUrl::try_new("https://opentelemetry.io/schemas/1.0.0".to_owned()); + assert!(result.is_ok()); + let schema_url = result.unwrap(); + assert_eq!( + schema_url.as_str(), + "https://opentelemetry.io/schemas/1.0.0" + ); + } + #[test] fn test_name_extraction_simple() { - let schema_url = SchemaUrl::new("https://opentelemetry.io/schemas/1.0.0".to_owned()); + let schema_url = + SchemaUrl::try_new("https://opentelemetry.io/schemas/1.0.0".to_owned()).unwrap(); assert_eq!(schema_url.name(), "opentelemetry.io/schemas"); } #[test] fn test_name_extraction_nested_path() { let schema_url = - SchemaUrl::new("https://opentelemetry.io/schemas/sub-component/1.0.0".to_owned()); + SchemaUrl::try_new("https://opentelemetry.io/schemas/sub-component/1.0.0".to_owned()) + .unwrap(); assert_eq!(schema_url.name(), "opentelemetry.io/schemas/sub-component"); } #[test] fn test_name_extraction_single_segment() { - let schema_url = SchemaUrl::new("https://opentelemetry.io/1.0.0".to_owned()); + let schema_url = SchemaUrl::try_new("https://opentelemetry.io/1.0.0".to_owned()).unwrap(); assert_eq!(schema_url.name(), "opentelemetry.io"); } #[test] fn test_name_extraction_with_port() { - let schema_url = SchemaUrl::new("https://example.com:8080/schemas/1.0.0".to_owned()); + let schema_url = + SchemaUrl::try_new("https://example.com:8080/schemas/1.0.0".to_owned()).unwrap(); assert_eq!(schema_url.name(), "example.com:8080/schemas"); } #[test] fn test_version_extraction_simple() { - let schema_url = SchemaUrl::new("https://opentelemetry.io/schemas/1.0.0".to_owned()); + let schema_url = + SchemaUrl::try_new("https://opentelemetry.io/schemas/1.0.0".to_owned()).unwrap(); assert_eq!(schema_url.version(), "1.0.0"); } #[test] fn test_version_extraction_semantic_version() { - let schema_url = SchemaUrl::new("https://example.com/schemas/1.2.3".to_owned()); + let schema_url = + SchemaUrl::try_new("https://example.com/schemas/1.2.3".to_owned()).unwrap(); assert_eq!(schema_url.version(), "1.2.3"); } #[test] fn test_version_extraction_single_segment() { - let schema_url = SchemaUrl::new("https://example.com/v1".to_owned()); + let schema_url = SchemaUrl::try_new("https://example.com/v1".to_owned()).unwrap(); assert_eq!(schema_url.version(), "v1"); } #[test] - fn test_from_name_version_with_https() { - let result = SchemaUrl::from_name_version("https://opentelemetry.io/schemas", "1.0.0"); + fn test_try_from_name_version_with_https() { + let result = SchemaUrl::try_from_name_version("https://opentelemetry.io/schemas", "1.0.0"); assert!(result.is_ok()); let schema_url = result.unwrap(); assert_eq!( @@ -225,8 +247,8 @@ mod tests { } #[test] - fn test_from_name_version_without_scheme() { - let result = SchemaUrl::from_name_version("opentelemetry.io/schemas", "1.0.0"); + fn test_try_from_name_version_without_scheme() { + let result = SchemaUrl::try_from_name_version("opentelemetry.io/schemas", "1.0.0"); assert!(result.is_ok()); let schema_url = result.unwrap(); assert_eq!( @@ -236,16 +258,16 @@ mod tests { } #[test] - fn test_from_name_version_with_http() { - let result = SchemaUrl::from_name_version("http://example.com/schemas", "1.0.0"); + fn test_try_from_name_version_with_http() { + let result = SchemaUrl::try_from_name_version("http://example.com/schemas", "1.0.0"); assert!(result.is_ok()); let schema_url = result.unwrap(); assert_eq!(schema_url.as_str(), "http://example.com/schemas/1.0.0"); } #[test] - fn test_from_name_version_with_trailing_slash() { - let result = SchemaUrl::from_name_version("https://example.com/schemas/", "1.0.0"); + fn test_try_from_name_version_with_trailing_slash() { + let result = SchemaUrl::try_from_name_version("https://example.com/schemas/", "1.0.0"); assert!(result.is_ok()); let schema_url = result.unwrap(); assert_eq!(schema_url.as_str(), "https://example.com/schemas/1.0.0"); @@ -253,9 +275,9 @@ mod tests { #[test] fn test_equality() { - let url1 = SchemaUrl::new("https://example.com/schemas/1.0.0".to_owned()); - let url2 = SchemaUrl::new("https://example.com/schemas/1.0.0".to_owned()); - let url3 = SchemaUrl::new("https://example.com/schemas/2.0.0".to_owned()); + let url1 = SchemaUrl::try_new("https://example.com/schemas/1.0.0".to_owned()).unwrap(); + let url2 = SchemaUrl::try_new("https://example.com/schemas/1.0.0".to_owned()).unwrap(); + let url3 = SchemaUrl::try_new("https://example.com/schemas/2.0.0".to_owned()).unwrap(); assert_eq!(url1, url2); assert_ne!(url1, url3); @@ -266,8 +288,8 @@ mod tests { use std::collections::hash_map::DefaultHasher; use std::hash::{Hash, Hasher}; - let url1 = SchemaUrl::new("https://example.com/schemas/1.0.0".to_owned()); - let url2 = SchemaUrl::new("https://example.com/schemas/1.0.0".to_owned()); + let url1 = SchemaUrl::try_new("https://example.com/schemas/1.0.0".to_owned()).unwrap(); + let url2 = SchemaUrl::try_new("https://example.com/schemas/1.0.0".to_owned()).unwrap(); let mut hasher1 = DefaultHasher::new(); url1.hash(&mut hasher1); @@ -282,7 +304,8 @@ mod tests { #[test] fn test_display() { - let schema_url = SchemaUrl::new("https://example.com/schemas/1.0.0".to_owned()); + let schema_url = + SchemaUrl::try_new("https://example.com/schemas/1.0.0".to_owned()).unwrap(); assert_eq!( format!("{}", schema_url), "https://example.com/schemas/1.0.0" @@ -291,7 +314,8 @@ mod tests { #[test] fn test_serialize() { - let schema_url = SchemaUrl::new("https://example.com/schemas/1.0.0".to_owned()); + let schema_url = + SchemaUrl::try_new("https://example.com/schemas/1.0.0".to_owned()).unwrap(); let json = serde_json::to_string(&schema_url).unwrap(); assert_eq!(json, "\"https://example.com/schemas/1.0.0\""); } @@ -305,7 +329,8 @@ mod tests { #[test] fn test_serialize_deserialize_roundtrip() { - let original = SchemaUrl::new("https://opentelemetry.io/schemas/1.0.0".to_owned()); + let original = + SchemaUrl::try_new("https://opentelemetry.io/schemas/1.0.0".to_owned()).unwrap(); let json = serde_json::to_string(&original).unwrap(); let deserialized: SchemaUrl = serde_json::from_str(&json).unwrap(); assert_eq!(original, deserialized); @@ -313,7 +338,8 @@ mod tests { #[test] fn test_name_caching() { - let schema_url = SchemaUrl::new("https://opentelemetry.io/schemas/1.0.0".to_owned()); + let schema_url = + SchemaUrl::try_new("https://opentelemetry.io/schemas/1.0.0".to_owned()).unwrap(); // Call name() twice and verify they return the same reference let name1 = schema_url.name(); @@ -328,7 +354,8 @@ mod tests { #[test] fn test_version_caching() { - let schema_url = SchemaUrl::new("https://opentelemetry.io/schemas/1.0.0".to_owned()); + let schema_url = + SchemaUrl::try_new("https://opentelemetry.io/schemas/1.0.0".to_owned()).unwrap(); // Call version() twice and verify they return the same reference let version1 = schema_url.version(); @@ -343,7 +370,8 @@ mod tests { #[test] fn test_clone_preserves_url_but_resets_cache() { - let original = SchemaUrl::new("https://opentelemetry.io/schemas/1.0.0".to_owned()); + let original = + SchemaUrl::try_new("https://opentelemetry.io/schemas/1.0.0".to_owned()).unwrap(); // Access name to populate cache let _ = original.name(); diff --git a/crates/weaver_semconv_gen/src/v2.rs b/crates/weaver_semconv_gen/src/v2.rs index 7393b10ea..98232e14b 100644 --- a/crates/weaver_semconv_gen/src/v2.rs +++ b/crates/weaver_semconv_gen/src/v2.rs @@ -452,7 +452,7 @@ mod tests { fn test_registry() -> ResolvedTelemetrySchema { ResolvedTelemetrySchema { file_format: "resolved/2.0.0".to_owned(), - schema_url: SchemaUrl::new("https://todo/1.0.0".to_owned()), + schema_url: SchemaUrl::try_new("https://todo/1.0.0".to_owned()).unwrap(), attribute_catalog: vec![Attribute { key: "attr1".to_owned(), r#type: AttributeType::PrimitiveOrArray(PrimitiveOrArrayTypeSpec::String), diff --git a/tests/resolution_process.rs b/tests/resolution_process.rs index 58bcf8e16..15b3e5400 100644 --- a/tests/resolution_process.rs +++ b/tests/resolution_process.rs @@ -34,9 +34,8 @@ fn test_cli_interface() { refspec: None, }; - let schema_url = Some(SchemaUrl::new( - "https://opentelemetry.io/schemas/1.40.0".to_owned(), - )); + let schema_url = + Some(SchemaUrl::try_new("https://opentelemetry.io/schemas/1.40.0".to_owned()).unwrap()); let registry_repo = RegistryRepo::try_new(schema_url, ®istry_path).unwrap_or_else(|e| { panic!("Failed to create the registry repo, error: {e}"); }); From 9eba7c694a233f583b6bc067cc8cd15e52973ef7 Mon Sep 17 00:00:00 2001 From: Liudmila Molkova Date: Sat, 14 Feb 2026 18:03:58 -0800 Subject: [PATCH 18/27] changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 688aae404..4b7f8b3b5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,9 @@ All notable changes to this project will be documented in this file. - New feature ([#1153](https://github.com/open-telemetry/weaver/issues/1153)) - Live-check now has a `/health` endpoint that can be used in long-running scenarios to confirm readiness and liveness of the live-check server. ([#1193](https://github.com/open-telemetry/weaver/pull/1193) by @jerbly) - New feature ([#1100](https://github.com/open-telemetry/weaver/issues/1100)) - Set `--output=http` to have live-check send its report as the response to `/stop`. ([#1193](https://github.com/open-telemetry/weaver/pull/1193) by @jerbly) +- Use `schema_url` in registry manifest, dependencies, and resolved schema instead of `registry_url`. Parse registry name and version + from it. ([#1202](https://github.com/open-telemetry/weaver/pull/1202) by @lmolkova) +- Default to `manifest.yaml` for registry manifest file, deprecate `registry_manifest.yaml` and add warning when it's used. ([#1202](https://github.com/open-telemetry/weaver/pull/1202) by @lmolkova) # [0.21.2] - 2026-02-03 From ed455602bb1d353f8549abb30d54641507bdcfc0 Mon Sep 17 00:00:00 2001 From: Liudmila Molkova Date: Sat, 14 Feb 2026 18:18:44 -0800 Subject: [PATCH 19/27] review --- crates/weaver_semconv/src/schema_url.rs | 31 ++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/crates/weaver_semconv/src/schema_url.rs b/crates/weaver_semconv/src/schema_url.rs index 257f74ad3..20f25f19f 100644 --- a/crates/weaver_semconv/src/schema_url.rs +++ b/crates/weaver_semconv/src/schema_url.rs @@ -69,11 +69,18 @@ impl SchemaUrl { _ = segments.pop(); } + // Construct authority from host and port (replaces deprecated authority() method) + let authority = match (parsed_url.host_str(), parsed_url.port()) { + (Some(host), Some(port)) => format!("{}:{}", host, port), + (Some(host), None) => host.to_owned(), + _ => String::new(), + }; + if segments.is_empty() { - return parsed_url.authority().to_owned(); + return authority; } - format!("{}/{}", parsed_url.authority(), segments.join("/")) + format!("{}/{}", authority, segments.join("/")) }) } @@ -140,7 +147,7 @@ impl<'de> Deserialize<'de> for SchemaUrl { D: Deserializer<'de>, { let s = String::deserialize(deserializer)?; - Ok(SchemaUrl::new(s)) + SchemaUrl::try_new(s).map_err(serde::de::Error::custom) } } @@ -327,6 +334,24 @@ mod tests { assert_eq!(schema_url.as_str(), "https://example.com/schemas/1.0.0"); } + #[test] + fn test_deserialize_invalid_url() { + let json = "\"not a valid url\""; + let result: Result = serde_json::from_str(json); + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err.to_string().contains("Invalid schema URL")); + } + + #[test] + fn test_deserialize_url_without_path() { + let json = "\"https://example.com\""; + let result: Result = serde_json::from_str(json); + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err.to_string().contains("at least one path segment")); + } + #[test] fn test_serialize_deserialize_roundtrip() { let original = From 4f6d5443fac82d0ba051260c25afe31f49481a26 Mon Sep 17 00:00:00 2001 From: Liudmila Molkova Date: Tue, 17 Feb 2026 16:20:56 -0800 Subject: [PATCH 20/27] try_into --- crates/weaver_emit/src/lib.rs | 3 +- crates/weaver_forge/src/lib.rs | 8 +- crates/weaver_forge/src/v2/registry.rs | 4 +- crates/weaver_live_check/src/live_checker.rs | 13 +-- crates/weaver_mcp/src/service.rs | 3 +- crates/weaver_resolved_schema/src/lib.rs | 4 +- crates/weaver_resolved_schema/src/v2/mod.rs | 8 +- crates/weaver_resolver/src/registry.rs | 9 +- crates/weaver_search/src/lib.rs | 3 +- crates/weaver_semconv/src/manifest.rs | 19 ++-- crates/weaver_semconv/src/registry.rs | 3 +- crates/weaver_semconv/src/schema_url.rs | 110 ++++++++++--------- crates/weaver_semconv_gen/src/v2.rs | 3 +- tests/resolution_process.rs | 4 +- 14 files changed, 95 insertions(+), 99 deletions(-) diff --git a/crates/weaver_emit/src/lib.rs b/crates/weaver_emit/src/lib.rs index e076982b0..e16d854fa 100644 --- a/crates/weaver_emit/src/lib.rs +++ b/crates/weaver_emit/src/lib.rs @@ -268,7 +268,6 @@ mod tests { use weaver_semconv::{ attribute::{AttributeType, Examples, PrimitiveOrArrayTypeSpec, RequirementLevel}, group::{GroupType, InstrumentSpec, SpanKindSpec}, - schema_url::SchemaUrl, stability::Stability, }; @@ -591,7 +590,7 @@ mod tests { }; let registry = ForgeResolvedRegistry { - schema_url: SchemaUrl::try_new("https://example.com/schemas/1.2.3".to_owned()).unwrap(), + schema_url: "https://example.com/schemas/1.2.3".try_into().unwrap(), registry: Registry { attributes: vec![], attribute_groups: vec![], diff --git a/crates/weaver_forge/src/lib.rs b/crates/weaver_forge/src/lib.rs index a6ca74fd4..dc68dad70 100644 --- a/crates/weaver_forge/src/lib.rs +++ b/crates/weaver_forge/src/lib.rs @@ -856,7 +856,7 @@ mod tests { cli_params: Params, ignore_non_fatal_errors: bool, ) -> (TemplateEngine, ResolvedRegistry, PathBuf, PathBuf) { - let schema_url = Some(SchemaUrl::try_new("https://default/1.0.0".to_owned()).unwrap()); + let schema_url: Option = Some("https://default/1.0.0".try_into().expect("Should be valid schema url")); let path: VirtualDirectoryPath = "data/registry" .try_into() .expect("Invalid virtual directory path string"); @@ -1060,7 +1060,7 @@ mod tests { let path: VirtualDirectoryPath = "data/registry" .try_into() .expect("Invalid virtual directory path string"); - let schema_url = Some(SchemaUrl::try_new("https://default/1.0.0".to_owned()).unwrap()); + let schema_url: Option = Some("https://default/1.0.0".try_into().expect("Should be valid schema url")); let repo = RegistryRepo::try_new(schema_url, &path).expect("Failed to construct repository"); let loaded = SchemaResolver::load_semconv_repository(repo, false) @@ -1190,9 +1190,9 @@ mod tests { let path: VirtualDirectoryPath = "data/mini_registry_for_comments" .try_into() .expect("Invalid virtual directory path string"); - let schema_url = Some(SchemaUrl::try_new("https://default/1.0.0".to_owned()).unwrap()); + let schema_url: SchemaUrl = "https://default/1.0.0".try_into().expect("Should be valid schema url"); let repo = - RegistryRepo::try_new(schema_url, &path).expect("Failed to construct repository"); + RegistryRepo::try_new(Some(schema_url), &path).expect("Failed to construct repository"); let loaded = SchemaResolver::load_semconv_repository(repo, false) .into_result_with_non_fatal() .expect("Failed to load registry") diff --git a/crates/weaver_forge/src/v2/registry.rs b/crates/weaver_forge/src/v2/registry.rs index 4cdc28556..51a64caab 100644 --- a/crates/weaver_forge/src/v2/registry.rs +++ b/crates/weaver_forge/src/v2/registry.rs @@ -448,7 +448,7 @@ mod tests { fn test_try_from_resolved_schema() { let resolved_schema = ResolvedTelemetrySchema { file_format: "2.0.0".to_owned(), - schema_url: SchemaUrl::try_new("https://example.com/schema".to_owned()).unwrap(), + schema_url: "https://example.com/schema".try_into().unwrap(), attribute_catalog: vec![attribute::Attribute { key: "test.attr".to_owned(), r#type: AttributeType::PrimitiveOrArray(PrimitiveOrArrayTypeSpec::String), @@ -610,7 +610,7 @@ mod tests { fn test_try_from_resolved_schema_with_missing_attribute() { let resolved_schema = ResolvedTelemetrySchema { file_format: "2.0.0".to_owned(), - schema_url: SchemaUrl::try_new("https://example.com/schema".to_owned()).unwrap(), + schema_url: "https://example.com/schema".try_into().unwrap(), attribute_catalog: vec![], registry: v2::registry::Registry { attributes: vec![], // No attributes - This is the logic bug. diff --git a/crates/weaver_live_check/src/live_checker.rs b/crates/weaver_live_check/src/live_checker.rs index d956f8fe5..8e040aa7d 100644 --- a/crates/weaver_live_check/src/live_checker.rs +++ b/crates/weaver_live_check/src/live_checker.rs @@ -196,7 +196,6 @@ mod tests { YamlValue, }; use weaver_semconv::{ - schema_url::SchemaUrl, v2::{span::SpanName, CommonFields}, }; @@ -512,8 +511,7 @@ mod tests { fn make_registry(use_v2: bool) -> VersionedRegistry { if use_v2 { VersionedRegistry::V2(Box::new(ForgeResolvedRegistry { - schema_url: SchemaUrl::try_new("https://example.com/schemas/1.2.3".to_owned()) - .unwrap(), + schema_url: "https://example.com/schemas/1.2.3".try_into().expect("Should be valid schema url"), registry: Registry { attributes: vec![ V2Attribute { @@ -799,8 +797,7 @@ mod tests { }; VersionedRegistry::V2(Box::new(ForgeResolvedRegistry { - schema_url: SchemaUrl::try_new("https://example.com/schemas/1.2.3".to_owned()) - .unwrap(), + schema_url: "https://example.com/schemas/1.2.3".try_into().expect("Should be valid schema url"), registry: Registry { attributes: vec![memory_state_attr.clone()], attribute_groups: vec![], @@ -1008,8 +1005,7 @@ mod tests { }; VersionedRegistry::V2(Box::new(ForgeResolvedRegistry { - schema_url: SchemaUrl::try_new("https://example.com/schemas/1.2.3".to_owned()) - .unwrap(), + schema_url: "https://example.com/schemas/1.2.3".try_into().expect("Should be valid schema url"), registry: Registry { attributes: vec![custom_string_attr.clone()], attribute_groups: vec![], @@ -1523,8 +1519,7 @@ mod tests { }; VersionedRegistry::V2(Box::new(ForgeResolvedRegistry { - schema_url: SchemaUrl::try_new("https://example.com/schemas/1.2.3".to_owned()) - .unwrap(), + schema_url: "https://example.com/schemas/1.2.3".try_into().expect("Should be valid schema url"), registry: Registry { attributes: vec![session_id_attr.clone(), session_previous_id_attr.clone()], attribute_groups: vec![], diff --git a/crates/weaver_mcp/src/service.rs b/crates/weaver_mcp/src/service.rs index 4264701b6..346c81bcd 100644 --- a/crates/weaver_mcp/src/service.rs +++ b/crates/weaver_mcp/src/service.rs @@ -387,14 +387,13 @@ mod tests { use weaver_search::SearchType; use weaver_semconv::attribute::AttributeType; use weaver_semconv::group::{InstrumentSpec, SpanKindSpec}; - use weaver_semconv::schema_url::SchemaUrl; use weaver_semconv::stability::Stability; use weaver_semconv::v2::span::SpanName; use weaver_semconv::v2::CommonFields; fn make_test_registry() -> ForgeResolvedRegistry { ForgeResolvedRegistry { - schema_url: SchemaUrl::try_new("https://todo/1.0.0".to_owned()).unwrap(), + schema_url: "https://todo/1.0.0".try_into().unwrap(), registry: Registry { attributes: vec![Attribute { key: "http.request.method".to_owned(), diff --git a/crates/weaver_resolved_schema/src/lib.rs b/crates/weaver_resolved_schema/src/lib.rs index dc798ff14..9f6441c5e 100644 --- a/crates/weaver_resolved_schema/src/lib.rs +++ b/crates/weaver_resolved_schema/src/lib.rs @@ -298,13 +298,13 @@ impl ResolvedTelemetrySchema { if let Some(ref manifest) = self.registry_manifest { changes.set_head_manifest(weaver_version::schema_changes::RegistryManifest { - semconv_version: manifest.version().clone(), + semconv_version: manifest.version().to_owned(), }); } if let Some(ref manifest) = baseline_schema.registry_manifest { changes.set_baseline_manifest(weaver_version::schema_changes::RegistryManifest { - semconv_version: manifest.version().clone(), + semconv_version: manifest.version().to_owned(), }); } diff --git a/crates/weaver_resolved_schema/src/v2/mod.rs b/crates/weaver_resolved_schema/src/v2/mod.rs index 43f1f8f04..d7358dc89 100644 --- a/crates/weaver_resolved_schema/src/v2/mod.rs +++ b/crates/weaver_resolved_schema/src/v2/mod.rs @@ -124,7 +124,7 @@ impl TryFrom for ResolvedTelemetrySchema { let (attribute_catalog, registry, refinements) = convert_v1_to_v2(value.catalog, value.registry)?; let schema_url_str = value.schema_url.clone(); - let schema_url = SchemaUrl::try_new(value.schema_url).map_err(|e| { + let schema_url: SchemaUrl = value.schema_url.try_into().map_err(|e| { crate::error::Error::InvalidSchemaUrl { url: schema_url_str, error: e, @@ -993,7 +993,7 @@ mod tests { registry_id: "my-registry".to_owned(), catalog: crate::catalog::Catalog::from_attributes(vec![]), registry: crate::registry::Registry { - registry_url: "http://test/schemas/1.0.0".to_owned(), + registry_url: "http://another/url/1.0".to_owned(), groups: vec![], }, instrumentation_library: None, @@ -1009,7 +1009,7 @@ mod tests { assert_eq!(v2_schema.file_format, V2_RESOLVED_FILE_FORMAT); assert_eq!( v2_schema.schema_url, - SchemaUrl::try_new("http://test/schemas/1.0.0".to_owned()).unwrap() + "http://test/schemas/1.0.0".try_into().unwrap() ); } @@ -1218,7 +1218,7 @@ mod tests { fn empty_v2_schema() -> ResolvedTelemetrySchema { ResolvedTelemetrySchema { file_format: V2_RESOLVED_FILE_FORMAT.to_owned(), - schema_url: SchemaUrl::try_new("http://test/schemas/1.0".to_owned()).unwrap(), + schema_url: "http://test/schemas/1.0".try_into().expect("Should be valid schema url"), attribute_catalog: vec![], registry: Registry { attributes: vec![], diff --git a/crates/weaver_resolver/src/registry.rs b/crates/weaver_resolver/src/registry.rs index 682486a03..89ef434e6 100644 --- a/crates/weaver_resolver/src/registry.rs +++ b/crates/weaver_resolver/src/registry.rs @@ -919,12 +919,12 @@ mod tests { let observed_output_dir = PathBuf::from(format!("observed_output/{test_dir}")); std::fs::create_dir_all(observed_output_dir.clone()) .expect("Failed to create observed output directory"); - let schema_url = Some(SchemaUrl::try_new("https://default/0.1.0".to_owned()).unwrap()); + let schema_url: SchemaUrl = "https://default/0.1.0".try_into().expect("Should be valid schema url"); let location: VirtualDirectoryPath = format!("{test_dir}/registry") .try_into() .expect("Failed to parse file directory"); let loaded = SchemaResolver::load_semconv_repository( - RegistryRepo::try_new(schema_url, &location).expect("Failed to load registry"), + RegistryRepo::try_new(Some(schema_url), &location).expect("Failed to load registry"), true, ) .ignore(|e| { @@ -1113,9 +1113,8 @@ groups: path: "data/registry-test-7-spans/registry".to_owned(), }; - let schema_url = - Some(SchemaUrl::try_new("https://local/registry/1.0.0".to_owned()).unwrap()); - let repo = RegistryRepo::try_new(schema_url, &path)?; + let schema_url: SchemaUrl = "https://local/registry/1.0.0".try_into().expect("Should be valid schema url"); + let repo = RegistryRepo::try_new(Some(schema_url), &path)?; let loaded = SchemaResolver::load_semconv_repository(repo, true).into_result_failing_non_fatal()?; let resolved_schema = diff --git a/crates/weaver_search/src/lib.rs b/crates/weaver_search/src/lib.rs index 1928c28f2..a4b945365 100644 --- a/crates/weaver_search/src/lib.rs +++ b/crates/weaver_search/src/lib.rs @@ -478,7 +478,6 @@ mod tests { use weaver_semconv::attribute::AttributeType; use weaver_semconv::deprecated::Deprecated; use weaver_semconv::group::{InstrumentSpec, SpanKindSpec}; - use weaver_semconv::schema_url::SchemaUrl; use weaver_semconv::stability::Stability; use weaver_semconv::v2::span::SpanName; use weaver_semconv::v2::CommonFields; @@ -544,7 +543,7 @@ mod tests { fn make_test_registry() -> ForgeResolvedRegistry { ForgeResolvedRegistry { - schema_url: SchemaUrl::try_new("https://example.com/schemas/1.2.3".to_owned()).unwrap(), + schema_url: "https://example.com/schemas/1.2.3".try_into().unwrap(), registry: Registry { attributes: vec![ make_attribute("http.request.method", "HTTP request method", "", false), diff --git a/crates/weaver_semconv/src/manifest.rs b/crates/weaver_semconv/src/manifest.rs index d0009ab48..87abf492d 100644 --- a/crates/weaver_semconv/src/manifest.rs +++ b/crates/weaver_semconv/src/manifest.rs @@ -222,16 +222,16 @@ impl RegistryManifest { /// For example, if the schema URL is `https://opentelemetry.io/schemas/sub-component/1.0.0`, /// the registry name would be `opentelemetry.io/schemas/sub-component` #[must_use] - pub fn name(&self) -> String { - self.schema_url.name().to_owned() + pub fn name(&self) -> &str { + self.schema_url.name() } /// Returns the registry version, which is derived from the schema URL. /// For example, if the schema URL is `https://opentelemetry.io/schemas/sub-component/1.0.0`, /// the registry version would be `1.0.0` #[must_use] - pub fn version(&self) -> String { - self.schema_url.version().to_owned() + pub fn version(&self) -> &str { + self.schema_url.version() } } @@ -353,8 +353,7 @@ registry_path: "./registry" #[test] fn test_dependency_serialize() { let dep = Dependency { - schema_url: SchemaUrl::try_new("https://opentelemetry.io/schemas/1.0.0".to_owned()) - .unwrap(), + schema_url: "https://opentelemetry.io/schemas/1.0.0".try_into().unwrap(), registry_path: None, name: None, }; @@ -370,8 +369,7 @@ registry_path: "./registry" #[test] fn test_dependency_serialize_with_registry_path() { let dep = Dependency { - schema_url: SchemaUrl::try_new("https://opentelemetry.io/schemas/1.0.0".to_owned()) - .unwrap(), + schema_url: "https://opentelemetry.io/schemas/1.0.0".try_into().unwrap(), registry_path: Some(VirtualDirectoryPath::LocalFolder { path: "./registry".to_owned(), }), @@ -386,8 +384,7 @@ registry_path: "./registry" #[test] fn test_dependency_serialize_without_optional_path() { let dep = Dependency { - schema_url: SchemaUrl::try_new("https://opentelemetry.io/schemas/1.0.0".to_owned()) - .unwrap(), + schema_url: "https://opentelemetry.io/schemas/1.0.0".try_into().unwrap(), registry_path: None, name: None, }; @@ -400,7 +397,7 @@ registry_path: "./registry" #[test] fn test_dependency_roundtrip_serialization() { let original = Dependency { - schema_url: SchemaUrl::try_new("https://example.com/schemas/1.0.0".to_owned()).unwrap(), + schema_url: "https://example.com/schemas/1.0.0".try_into().unwrap(), registry_path: Some(VirtualDirectoryPath::LocalFolder { path: "./test/registry".to_owned(), }), diff --git a/crates/weaver_semconv/src/registry.rs b/crates/weaver_semconv/src/registry.rs index 931017f97..42da8ff5a 100644 --- a/crates/weaver_semconv/src/registry.rs +++ b/crates/weaver_semconv/src/registry.rs @@ -291,7 +291,6 @@ mod tests { use crate::provenance::Provenance; use crate::registry::SemConvRegistry; use crate::registry_repo::RegistryRepo; - use crate::schema_url::SchemaUrl; use crate::semconv::{SemConvSpec, SemConvSpecV1, SemConvSpecWithProvenance}; use crate::Error; @@ -397,7 +396,7 @@ mod tests { path: "data".to_owned(), }; let registry_repo = RegistryRepo::try_new( - Some(SchemaUrl::try_new("https://test/42".to_owned()).unwrap()), + Some("https://test/42".try_into().expect("Should be valid schema url")), ®istry_path, ) .unwrap(); diff --git a/crates/weaver_semconv/src/schema_url.rs b/crates/weaver_semconv/src/schema_url.rs index 20f25f19f..6083f2339 100644 --- a/crates/weaver_semconv/src/schema_url.rs +++ b/crates/weaver_semconv/src/schema_url.rs @@ -31,14 +31,6 @@ impl SchemaUrl { } } - /// Create a new SchemaUrl from a string with validation. - /// Returns an error if the URL is invalid or doesn't have at least one path segment. - pub fn try_new(url: String) -> Result { - let schema_url = Self::new(url); - schema_url.validate()?; - Ok(schema_url) - } - /// Get the URL as a string. pub fn as_str(&self) -> &str { &self.url @@ -105,13 +97,12 @@ impl SchemaUrl { return Err("Registry name and version cannot be empty.".to_owned()); } // TODO: replace with scheme regex - SchemaUrl::try_new( - if name.starts_with("http://") || name.starts_with("https://") { - format!("{}/{}", name.trim_end_matches('/'), version) - } else { - format!("https://{}/{}", name.trim_end_matches('/'), version) - }, - ) + + if name.starts_with("http://") || name.starts_with("https://") { + format!("{}/{}", name.trim_end_matches('/'), version).try_into() + } else { + format!("https://{}/{}", name.trim_end_matches('/'), version).try_into() + } } /// Returns a default unknown schema URL. @@ -147,7 +138,8 @@ impl<'de> Deserialize<'de> for SchemaUrl { D: Deserializer<'de>, { let s = String::deserialize(deserializer)?; - SchemaUrl::try_new(s).map_err(serde::de::Error::custom) + let schema: SchemaUrl = s.try_into().map_err(serde::de::Error::custom)?; + Ok(schema) } } @@ -160,6 +152,26 @@ impl Serialize for SchemaUrl { } } +impl TryFrom<&str> for SchemaUrl { + type Error = String; + + fn try_from(value: &str) -> Result { + let schema_url = Self::new(value.to_owned()); + schema_url.validate()?; + Ok(schema_url) + } +} + +impl TryFrom for SchemaUrl { + type Error = String; + + fn try_from(value: String) -> Result { + let schema_url = Self::new(value); + schema_url.validate()?; + Ok(schema_url) + } +} + #[cfg(test)] mod tests { use super::*; @@ -167,25 +179,26 @@ mod tests { #[test] fn test_new_and_as_str() { let url = "https://opentelemetry.io/schemas/1.0.0"; - let schema_url = SchemaUrl::try_new(url.to_owned()).unwrap(); + let schema_url: SchemaUrl = url.try_into().unwrap(); assert_eq!(schema_url.as_str(), url); } #[test] fn test_validate_invalid_url_syntax() { - assert!(SchemaUrl::try_new("not a valid url".to_owned()).is_err()); + let result: Result = "not a valid url".try_into(); + assert!(result.is_err()); } #[test] fn test_validate_url_without_path() { - let result = SchemaUrl::try_new("https://opentelemetry.io".to_owned()); + let result = TryInto::::try_into("https://opentelemetry.io"); assert!(result.is_err()); assert!(result.unwrap_err().contains("at least one path segment")); } #[test] fn test_try_new_valid_url() { - let result = SchemaUrl::try_new("https://opentelemetry.io/schemas/1.0.0".to_owned()); + let result = TryInto::::try_into("https://opentelemetry.io/schemas/1.0.0"); assert!(result.is_ok()); let schema_url = result.unwrap(); assert_eq!( @@ -196,49 +209,46 @@ mod tests { #[test] fn test_name_extraction_simple() { - let schema_url = - SchemaUrl::try_new("https://opentelemetry.io/schemas/1.0.0".to_owned()).unwrap(); + let schema_url: SchemaUrl = TryInto::::try_into("https://opentelemetry.io/schemas/1.0.0").unwrap(); assert_eq!(schema_url.name(), "opentelemetry.io/schemas"); } #[test] fn test_name_extraction_nested_path() { - let schema_url = - SchemaUrl::try_new("https://opentelemetry.io/schemas/sub-component/1.0.0".to_owned()) - .unwrap(); + let schema_url: SchemaUrl = TryInto::::try_into("https://opentelemetry.io/schemas/sub-component/1.0.0").unwrap(); assert_eq!(schema_url.name(), "opentelemetry.io/schemas/sub-component"); } #[test] fn test_name_extraction_single_segment() { - let schema_url = SchemaUrl::try_new("https://opentelemetry.io/1.0.0".to_owned()).unwrap(); + let schema_url: SchemaUrl = "https://opentelemetry.io/1.0.0".try_into().unwrap(); assert_eq!(schema_url.name(), "opentelemetry.io"); } #[test] fn test_name_extraction_with_port() { - let schema_url = - SchemaUrl::try_new("https://example.com:8080/schemas/1.0.0".to_owned()).unwrap(); + let schema_url: SchemaUrl = + "https://example.com:8080/schemas/1.0.0".try_into().unwrap(); assert_eq!(schema_url.name(), "example.com:8080/schemas"); } #[test] fn test_version_extraction_simple() { - let schema_url = - SchemaUrl::try_new("https://opentelemetry.io/schemas/1.0.0".to_owned()).unwrap(); + let schema_url: SchemaUrl = + "https://opentelemetry.io/schemas/1.0.0".try_into().unwrap(); assert_eq!(schema_url.version(), "1.0.0"); } #[test] fn test_version_extraction_semantic_version() { - let schema_url = - SchemaUrl::try_new("https://example.com/schemas/1.2.3".to_owned()).unwrap(); + let schema_url: SchemaUrl = + "https://example.com/schemas/1.2.3".try_into().unwrap(); assert_eq!(schema_url.version(), "1.2.3"); } #[test] fn test_version_extraction_single_segment() { - let schema_url = SchemaUrl::try_new("https://example.com/v1".to_owned()).unwrap(); + let schema_url: SchemaUrl = "https://example.com/v1".try_into().unwrap(); assert_eq!(schema_url.version(), "v1"); } @@ -282,9 +292,9 @@ mod tests { #[test] fn test_equality() { - let url1 = SchemaUrl::try_new("https://example.com/schemas/1.0.0".to_owned()).unwrap(); - let url2 = SchemaUrl::try_new("https://example.com/schemas/1.0.0".to_owned()).unwrap(); - let url3 = SchemaUrl::try_new("https://example.com/schemas/2.0.0".to_owned()).unwrap(); + let url1: SchemaUrl = "https://example.com/schemas/1.0.0".try_into().unwrap(); + let url2: SchemaUrl = "https://example.com/schemas/1.0.0".try_into().unwrap(); + let url3: SchemaUrl = "https://example.com/schemas/2.0.0".try_into().unwrap(); assert_eq!(url1, url2); assert_ne!(url1, url3); @@ -295,8 +305,8 @@ mod tests { use std::collections::hash_map::DefaultHasher; use std::hash::{Hash, Hasher}; - let url1 = SchemaUrl::try_new("https://example.com/schemas/1.0.0".to_owned()).unwrap(); - let url2 = SchemaUrl::try_new("https://example.com/schemas/1.0.0".to_owned()).unwrap(); + let url1: SchemaUrl = "https://example.com/schemas/1.0.0".try_into().unwrap(); + let url2: SchemaUrl = "https://example.com/schemas/1.0.0".try_into().unwrap(); let mut hasher1 = DefaultHasher::new(); url1.hash(&mut hasher1); @@ -311,8 +321,8 @@ mod tests { #[test] fn test_display() { - let schema_url = - SchemaUrl::try_new("https://example.com/schemas/1.0.0".to_owned()).unwrap(); + let schema_url: SchemaUrl = + "https://example.com/schemas/1.0.0".try_into().unwrap(); assert_eq!( format!("{}", schema_url), "https://example.com/schemas/1.0.0" @@ -321,8 +331,8 @@ mod tests { #[test] fn test_serialize() { - let schema_url = - SchemaUrl::try_new("https://example.com/schemas/1.0.0".to_owned()).unwrap(); + let schema_url: SchemaUrl = + "https://example.com/schemas/1.0.0".try_into().unwrap(); let json = serde_json::to_string(&schema_url).unwrap(); assert_eq!(json, "\"https://example.com/schemas/1.0.0\""); } @@ -354,8 +364,8 @@ mod tests { #[test] fn test_serialize_deserialize_roundtrip() { - let original = - SchemaUrl::try_new("https://opentelemetry.io/schemas/1.0.0".to_owned()).unwrap(); + let original: SchemaUrl = + "https://opentelemetry.io/schemas/1.0.0".try_into().unwrap(); let json = serde_json::to_string(&original).unwrap(); let deserialized: SchemaUrl = serde_json::from_str(&json).unwrap(); assert_eq!(original, deserialized); @@ -363,8 +373,8 @@ mod tests { #[test] fn test_name_caching() { - let schema_url = - SchemaUrl::try_new("https://opentelemetry.io/schemas/1.0.0".to_owned()).unwrap(); + let schema_url: SchemaUrl = + "https://opentelemetry.io/schemas/1.0.0".try_into().unwrap(); // Call name() twice and verify they return the same reference let name1 = schema_url.name(); @@ -379,8 +389,8 @@ mod tests { #[test] fn test_version_caching() { - let schema_url = - SchemaUrl::try_new("https://opentelemetry.io/schemas/1.0.0".to_owned()).unwrap(); + let schema_url: SchemaUrl = + "https://opentelemetry.io/schemas/1.0.0".try_into().unwrap(); // Call version() twice and verify they return the same reference let version1 = schema_url.version(); @@ -395,8 +405,8 @@ mod tests { #[test] fn test_clone_preserves_url_but_resets_cache() { - let original = - SchemaUrl::try_new("https://opentelemetry.io/schemas/1.0.0".to_owned()).unwrap(); + let original: SchemaUrl = + "https://opentelemetry.io/schemas/1.0.0".try_into().unwrap(); // Access name to populate cache let _ = original.name(); diff --git a/crates/weaver_semconv_gen/src/v2.rs b/crates/weaver_semconv_gen/src/v2.rs index 98232e14b..2e805ba5d 100644 --- a/crates/weaver_semconv_gen/src/v2.rs +++ b/crates/weaver_semconv_gen/src/v2.rs @@ -413,7 +413,6 @@ mod tests { use weaver_semconv::{ attribute::{AttributeType, PrimitiveOrArrayTypeSpec}, group::InstrumentSpec, - schema_url::SchemaUrl, v2::{span::SpanName, CommonFields}, }; @@ -452,7 +451,7 @@ mod tests { fn test_registry() -> ResolvedTelemetrySchema { ResolvedTelemetrySchema { file_format: "resolved/2.0.0".to_owned(), - schema_url: SchemaUrl::try_new("https://todo/1.0.0".to_owned()).unwrap(), + schema_url: "https://todo/1.0.0".try_into().unwrap(), attribute_catalog: vec![Attribute { key: "attr1".to_owned(), r#type: AttributeType::PrimitiveOrArray(PrimitiveOrArrayTypeSpec::String), diff --git a/tests/resolution_process.rs b/tests/resolution_process.rs index 15b3e5400..78e345662 100644 --- a/tests/resolution_process.rs +++ b/tests/resolution_process.rs @@ -34,8 +34,8 @@ fn test_cli_interface() { refspec: None, }; - let schema_url = - Some(SchemaUrl::try_new("https://opentelemetry.io/schemas/1.40.0".to_owned()).unwrap()); + let schema_url: Option = + Some("https://opentelemetry.io/schemas/1.40.0".try_into().unwrap()); let registry_repo = RegistryRepo::try_new(schema_url, ®istry_path).unwrap_or_else(|e| { panic!("Failed to create the registry repo, error: {e}"); }); From 46884530215ca61f0959bde6e99081aadea8e044 Mon Sep 17 00:00:00 2001 From: Liudmila Molkova Date: Tue, 17 Feb 2026 19:45:02 -0800 Subject: [PATCH 21/27] warning/error in the future, remove deprecated properties from the API --- crates/weaver_codegen_test/build.rs | 4 +- crates/weaver_forge/src/lib.rs | 28 ++++-- crates/weaver_live_check/src/live_checker.rs | 4 +- crates/weaver_resolved_schema/src/v2/mod.rs | 18 ++-- crates/weaver_resolver/src/lib.rs | 6 +- crates/weaver_resolver/src/loader.rs | 22 ++-- crates/weaver_resolver/src/registry.rs | 22 +++- crates/weaver_semconv/src/lib.rs | 18 ++++ crates/weaver_semconv/src/manifest.rs | 100 +++++++++++-------- crates/weaver_semconv/src/registry.rs | 18 ++-- crates/weaver_semconv/src/registry_repo.rs | 35 ++++--- crates/weaver_semconv/src/schema_url.rs | 34 +++---- crates/weaver_semconv_gen/src/v1.rs | 2 +- src/registry/check.rs | 2 +- src/registry/diff.rs | 4 +- src/registry/resolve.rs | 8 +- src/weaver.rs | 6 +- tests/resolution_process.rs | 14 ++- 18 files changed, 211 insertions(+), 134 deletions(-) diff --git a/crates/weaver_codegen_test/build.rs b/crates/weaver_codegen_test/build.rs index 584219f08..3fb772353 100644 --- a/crates/weaver_codegen_test/build.rs +++ b/crates/weaver_codegen_test/build.rs @@ -42,8 +42,8 @@ fn main() { let registry_path = VirtualDirectoryPath::LocalFolder { path: SEMCONV_REGISTRY_PATH.into(), }; - let registry_repo = - RegistryRepo::try_new(None, ®istry_path).unwrap_or_else(|e| process_error(&logger, e)); + let registry_repo = RegistryRepo::try_new(None, ®istry_path, &mut vec![]) + .unwrap_or_else(|e| process_error(&logger, e)); let loaded = SchemaResolver::load_semconv_repository(registry_repo, FOLLOW_SYMLINKS) .ignore(|e| matches!(e.severity(), Some(miette::Severity::Warning))) .into_result_failing_non_fatal() diff --git a/crates/weaver_forge/src/lib.rs b/crates/weaver_forge/src/lib.rs index dc68dad70..5c1a56aa4 100644 --- a/crates/weaver_forge/src/lib.rs +++ b/crates/weaver_forge/src/lib.rs @@ -856,12 +856,16 @@ mod tests { cli_params: Params, ignore_non_fatal_errors: bool, ) -> (TemplateEngine, ResolvedRegistry, PathBuf, PathBuf) { - let schema_url: Option = Some("https://default/1.0.0".try_into().expect("Should be valid schema url")); + let schema_url: Option = Some( + "https://default/1.0.0" + .try_into() + .expect("Should be valid schema url"), + ); let path: VirtualDirectoryPath = "data/registry" .try_into() .expect("Invalid virtual directory path string"); - let repo = - RegistryRepo::try_new(schema_url, &path).expect("Failed to construct repository"); + let repo = RegistryRepo::try_new(schema_url, &path, &mut vec![]) + .expect("Failed to construct repository"); let registry_result = SchemaResolver::load_semconv_repository(repo, false); // SemConvRegistry::try_from_path_pattern(registry_id, "data/*.yaml"); let registry = if ignore_non_fatal_errors { @@ -1060,9 +1064,13 @@ mod tests { let path: VirtualDirectoryPath = "data/registry" .try_into() .expect("Invalid virtual directory path string"); - let schema_url: Option = Some("https://default/1.0.0".try_into().expect("Should be valid schema url")); - let repo = - RegistryRepo::try_new(schema_url, &path).expect("Failed to construct repository"); + let schema_url: Option = Some( + "https://default/1.0.0" + .try_into() + .expect("Should be valid schema url"), + ); + let repo = RegistryRepo::try_new(schema_url, &path, &mut vec![]) + .expect("Failed to construct repository"); let loaded = SchemaResolver::load_semconv_repository(repo, false) .into_result_with_non_fatal() .expect("Failed to load registry") @@ -1190,9 +1198,11 @@ mod tests { let path: VirtualDirectoryPath = "data/mini_registry_for_comments" .try_into() .expect("Invalid virtual directory path string"); - let schema_url: SchemaUrl = "https://default/1.0.0".try_into().expect("Should be valid schema url"); - let repo = - RegistryRepo::try_new(Some(schema_url), &path).expect("Failed to construct repository"); + let schema_url: SchemaUrl = "https://default/1.0.0" + .try_into() + .expect("Should be valid schema url"); + let repo = RegistryRepo::try_new(Some(schema_url), &path, &mut vec![]) + .expect("Failed to construct repository"); let loaded = SchemaResolver::load_semconv_repository(repo, false) .into_result_with_non_fatal() .expect("Failed to load registry") diff --git a/crates/weaver_live_check/src/live_checker.rs b/crates/weaver_live_check/src/live_checker.rs index 8e040aa7d..d5afe1e60 100644 --- a/crates/weaver_live_check/src/live_checker.rs +++ b/crates/weaver_live_check/src/live_checker.rs @@ -186,6 +186,7 @@ mod tests { span::{Span as V2Span, SpanAttribute}, }; use weaver_resolved_schema::attribute::Attribute; + use weaver_semconv::v2::{span::SpanName, CommonFields}; use weaver_semconv::{ attribute::{ AttributeType, BasicRequirementLevelSpec, EnumEntriesSpec, Examples, @@ -195,9 +196,6 @@ mod tests { stability::Stability, YamlValue, }; - use weaver_semconv::{ - v2::{span::SpanName, CommonFields}, - }; fn get_all_advice(sample: &mut Sample) -> &mut [PolicyFinding] { match sample { diff --git a/crates/weaver_resolved_schema/src/v2/mod.rs b/crates/weaver_resolved_schema/src/v2/mod.rs index d7358dc89..143fa2fef 100644 --- a/crates/weaver_resolved_schema/src/v2/mod.rs +++ b/crates/weaver_resolved_schema/src/v2/mod.rs @@ -124,12 +124,14 @@ impl TryFrom for ResolvedTelemetrySchema { let (attribute_catalog, registry, refinements) = convert_v1_to_v2(value.catalog, value.registry)?; let schema_url_str = value.schema_url.clone(); - let schema_url: SchemaUrl = value.schema_url.try_into().map_err(|e| { - crate::error::Error::InvalidSchemaUrl { - url: schema_url_str, - error: e, - } - })?; + let schema_url: SchemaUrl = + value + .schema_url + .try_into() + .map_err(|e| crate::error::Error::InvalidSchemaUrl { + url: schema_url_str, + error: e, + })?; Ok(ResolvedTelemetrySchema { file_format: V2_RESOLVED_FILE_FORMAT.to_owned(), @@ -1218,7 +1220,9 @@ mod tests { fn empty_v2_schema() -> ResolvedTelemetrySchema { ResolvedTelemetrySchema { file_format: V2_RESOLVED_FILE_FORMAT.to_owned(), - schema_url: "http://test/schemas/1.0".try_into().expect("Should be valid schema url"), + schema_url: "http://test/schemas/1.0" + .try_into() + .expect("Should be valid schema url"), attribute_catalog: vec![], registry: Registry { attributes: vec![], diff --git a/crates/weaver_resolver/src/lib.rs b/crates/weaver_resolver/src/lib.rs index 992f504fc..fb79cbd7e 100644 --- a/crates/weaver_resolver/src/lib.rs +++ b/crates/weaver_resolver/src/lib.rs @@ -51,7 +51,7 @@ impl SchemaResolver { } } - // Actually resolves a defiinition registry. + // Actually resolves a definition registry. fn resolve_registry( repo: RegistryRepo, specs: Vec, @@ -243,7 +243,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "data/multi-registry/custom_registry".to_owned(), }; - let registry_repo = RegistryRepo::try_new(None, ®istry_path)?; + let registry_repo = RegistryRepo::try_new(None, ®istry_path, &mut vec![])?; // test with the `include_unreferenced` flag set to false check_semconv_load_and_resolve(registry_repo.clone(), false); // test with the `include_unreferenced` flag set to true @@ -257,7 +257,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "data/multi-registry/app_registry".to_owned(), }; - let registry_repo = RegistryRepo::try_new(None, ®istry_path)?; + let registry_repo = RegistryRepo::try_new(None, ®istry_path, &mut vec![])?; let result = SchemaResolver::load_semconv_repository(registry_repo, true); match result { diff --git a/crates/weaver_resolver/src/loader.rs b/crates/weaver_resolver/src/loader.rs index fd94a17d0..f3bc04341 100644 --- a/crates/weaver_resolver/src/loader.rs +++ b/crates/weaver_resolver/src/loader.rs @@ -48,10 +48,11 @@ impl LoadedSemconvRegistry { use weaver_common::vdir::VirtualDirectoryPath; use weaver_semconv::provenance::Provenance; let path: VirtualDirectoryPath = "data".try_into().expect("Bad fake path for test"); - let repo = RegistryRepo::try_new(None, &path).map_err(|e| Error::InvalidUrl { - url: "test string".to_owned(), - error: format!("{e}"), - })?; + let repo = + RegistryRepo::try_new(None, &path, &mut vec![]).map_err(|e| Error::InvalidUrl { + url: "test string".to_owned(), + error: format!("{e}"), + })?; let provenance = Provenance::new("default", ""); let spec_with_provenance = SemConvSpecWithProvenance::from_string(provenance, spec) .into_result_failing_non_fatal() @@ -200,10 +201,13 @@ fn load_semconv_repository_recursive( } // Load dependencies. let mut loaded_dependencies = vec![]; - let mut non_fatal_errors = vec![]; + let mut non_fatal_errors: Vec = vec![]; for d in manifest.dependencies.iter() { - match RegistryRepo::try_new_dependency(d) { + let mut semconv_nfes: Vec = vec![]; + match RegistryRepo::try_new_dependency(d, &mut semconv_nfes) { Ok(d_repo) => { + non_fatal_errors + .extend(semconv_nfes.into_iter().map(Error::FailToResolveDefinition)); // so we need to make sure the dependency chain only include direct dependencies of each other. match load_semconv_repository_recursive( d_repo, @@ -398,7 +402,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "data/multi-registry/custom_registry".to_owned(), }; - let registry_repo = RegistryRepo::try_new(None, ®istry_path)?; + let registry_repo = RegistryRepo::try_new(None, ®istry_path, &mut vec![])?; let mut diag_msgs = DiagnosticMessages::empty(); let loaded = load_semconv_repository(registry_repo, false) .capture_non_fatal_errors(&mut diag_msgs)?; @@ -440,7 +444,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "data/multi-registry/app_registry".to_owned(), }; - let registry_repo = RegistryRepo::try_new(None, ®istry_path)?; + let registry_repo = RegistryRepo::try_new(None, ®istry_path, &mut vec![])?; // Try with depth limit of 1 - should fail at acme->otel transition let mut visited_registries = HashSet::new(); @@ -475,7 +479,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "data/circular-registry-test/registry_a".to_owned(), }; - let registry_repo = RegistryRepo::try_new(None, ®istry_path)?; + let registry_repo = RegistryRepo::try_new(None, ®istry_path, &mut vec![])?; let result = load_semconv_repository(registry_repo, true); match result { diff --git a/crates/weaver_resolver/src/registry.rs b/crates/weaver_resolver/src/registry.rs index 89ef434e6..b2073eb67 100644 --- a/crates/weaver_resolver/src/registry.rs +++ b/crates/weaver_resolver/src/registry.rs @@ -919,12 +919,15 @@ mod tests { let observed_output_dir = PathBuf::from(format!("observed_output/{test_dir}")); std::fs::create_dir_all(observed_output_dir.clone()) .expect("Failed to create observed output directory"); - let schema_url: SchemaUrl = "https://default/0.1.0".try_into().expect("Should be valid schema url"); + let schema_url: SchemaUrl = "https://default/0.1.0" + .try_into() + .expect("Should be valid schema url"); let location: VirtualDirectoryPath = format!("{test_dir}/registry") .try_into() .expect("Failed to parse file directory"); let loaded = SchemaResolver::load_semconv_repository( - RegistryRepo::try_new(Some(schema_url), &location).expect("Failed to load registry"), + RegistryRepo::try_new(Some(schema_url), &location, &mut vec![]) + .expect("Failed to load registry"), true, ) .ignore(|e| { @@ -951,6 +954,14 @@ mod tests { ) ) }) + .ignore(|e| { + matches!( + e, + crate::Error::FailToResolveDefinition( + weaver_semconv::Error::LegacyRegistryManifest { path: _ } + ) + ) + }) .into_result_failing_non_fatal() .expect("Failed to load semconv specs"); @@ -1113,8 +1124,11 @@ groups: path: "data/registry-test-7-spans/registry".to_owned(), }; - let schema_url: SchemaUrl = "https://local/registry/1.0.0".try_into().expect("Should be valid schema url"); - let repo = RegistryRepo::try_new(Some(schema_url), &path)?; + let schema_url: SchemaUrl = "https://local/registry/1.0.0" + .try_into() + .expect("Should be valid schema url"); + let repo = RegistryRepo::try_new(Some(schema_url), &path, &mut vec![]) + .expect("Failed to load registry"); let loaded = SchemaResolver::load_semconv_repository(repo, true).into_result_failing_non_fatal()?; let resolved_schema = diff --git a/crates/weaver_semconv/src/lib.rs b/crates/weaver_semconv/src/lib.rs index 7e312cac2..dcff8f418 100644 --- a/crates/weaver_semconv/src/lib.rs +++ b/crates/weaver_semconv/src/lib.rs @@ -321,6 +321,24 @@ pub enum Error { error: String, }, + /// This error is raised when a registry manifest is using a legacy file name. + #[diagnostic(severity(Warning))] + #[error("The registry manifest at {path:?} is using a legacy file name. Please rename it to `manifest.yaml`.")] + LegacyRegistryManifest { + /// The path to the registry manifest file. + path: PathBuf, + }, + + /// This error is raised when a registry manifest includes deprecated properties. + #[error("The syntax used in the registry manifest at {path:?} is deprecated. {error}")] + #[diagnostic(severity(Warning))] + DeprecatedSyntaxInRegistryManifest { + /// The path to the registry manifest file. + path: PathBuf, + /// The error that occurred. + error: String, + }, + /// A container for multiple errors. #[error("{:?}", format_errors(.0))] CompoundError(#[related] Vec), diff --git a/crates/weaver_semconv/src/manifest.rs b/crates/weaver_semconv/src/manifest.rs index 87abf492d..c17e1ada8 100644 --- a/crates/weaver_semconv/src/manifest.rs +++ b/crates/weaver_semconv/src/manifest.rs @@ -8,10 +8,13 @@ //! In the future, this struct may be extended to include additional information //! such as the registry's owner, maintainers, and dependencies. +use std::vec; + +use crate::registry_repo::LEGACY_REGISTRY_MANIFEST; use crate::schema_url::SchemaUrl; use crate::stability::Stability; use crate::Error; -use crate::Error::{InvalidRegistryManifest, RegistryManifestNotFound}; +use crate::Error::{InvalidRegistryManifest, LegacyRegistryManifest, RegistryManifestNotFound, DeprecatedSyntaxInRegistryManifest}; use schemars::JsonSchema; use serde::{Deserialize, Deserializer, Serialize}; use weaver_common::vdir::VirtualDirectoryPath; @@ -43,20 +46,6 @@ pub struct RegistryManifest { #[serde(skip_serializing_if = "Option::is_none")] pub description: Option, - /// The version of the registry which will be used to define the semconv package version. - #[serde(default, skip_serializing)] - #[deprecated( - note = "The `version` field is deprecated. The registry version should be specified in the `schema_url` field, which is required and serves as a unique identifier for the registry." - )] - pub semconv_version: Option, - - /// The base URL where the registry's schema files are hosted. - #[serde(default, skip_serializing)] - #[deprecated( - note = "The `schema_base_url` field is deprecated. The registry schema URL should be specified in the `schema_url` field, which is required and serves as a unique identifier for the registry." - )] - pub schema_base_url: Option, - /// List of the registry's dependencies. /// Note: In the current phase, we only support zero or one dependency. /// See this GH issue for more details: @@ -70,6 +59,9 @@ pub struct RegistryManifest { /// The location of the resolved telemetry schema, if available. #[serde(skip_serializing_if = "Option::is_none")] pub resolved_schema_uri: Option, + + #[serde(skip)] + deserialization_warnings: Vec, } /// Represents a dependency of a semantic convention registry. @@ -91,16 +83,6 @@ pub struct Dependency { /// - A directory containing the raw definition. #[serde(skip_serializing_if = "Option::is_none")] pub registry_path: Option, - - /// This field is deprecated and should not be used. - /// The registry name should be derived from the `schema_url` field, - /// which serves as a unique identifier for the dependency registry - /// and includes registry version. - #[deprecated( - note = "The `name` field is deprecated. The registry name should be derived from the `schema_url` field, which serves as a unique identifier for the dependency registry." - )] - #[serde(default, skip_serializing)] // we can read, but won't write this field - pub name: Option, } impl<'de> Deserialize<'de> for Dependency { @@ -131,7 +113,6 @@ impl<'de> Deserialize<'de> for Dependency { Ok(Dependency { schema_url, registry_path: helper.registry_path, - name: None, }) } } @@ -158,6 +139,7 @@ impl<'de> Deserialize<'de> for RegistryManifest { } let helper = RegistryManifestHelper::deserialize(deserializer)?; + let mut warnings = vec![]; let schema_url = if let Some(url) = helper.schema_url { url @@ -168,11 +150,14 @@ impl<'de> Deserialize<'de> for RegistryManifest { "Either 'schema_url' or both 'schema_base_url' and 'semconv_version' must be provided", ) })?; + let version = helper.semconv_version.as_ref().ok_or_else(|| { serde::de::Error::custom( "Either 'schema_url' or both 'schema_base_url' and 'semconv_version' must be provided", ) })?; + + warnings.push("The 'semconv_version' and 'schema_base_url' fields are deprecated in favor of 'schema_url'.".to_owned()); SchemaUrl::try_from_name_version(base_url, version).map_err(serde::de::Error::custom)? }; @@ -180,13 +165,10 @@ impl<'de> Deserialize<'de> for RegistryManifest { file_format: helper.file_format, schema_url, description: helper.description, - #[allow(deprecated)] - semconv_version: helper.semconv_version, - #[allow(deprecated)] - schema_base_url: helper.schema_base_url, dependencies: helper.dependencies, stability: helper.stability, resolved_schema_uri: helper.resolved_schema_uri, + deserialization_warnings: warnings, }) } } @@ -195,7 +177,10 @@ impl RegistryManifest { /// Attempts to load a registry manifest from a file. /// /// The expected file format is YAML. - pub fn try_from_file>(path: P) -> Result { + pub fn try_from_file>( + path: P, + nfes: &mut Vec, + ) -> Result { let manifest_path_buf = path.as_ref().to_path_buf(); if !manifest_path_buf.exists() { @@ -215,6 +200,28 @@ impl RegistryManifest { error: e.to_string(), })?; + // Check if this is a legacy manifest file + let is_legacy = if let Some(file_name) = manifest_path_buf.file_name() { + file_name == LEGACY_REGISTRY_MANIFEST + } else { + false + }; + + if is_legacy { + nfes.push(LegacyRegistryManifest { + path: manifest_path_buf.clone(), + }); + } + + nfes.extend( + manifest + .deserialization_warnings + .iter() + .map(|w| DeprecatedSyntaxInRegistryManifest { + path: manifest_path_buf.clone(), + error: w.clone(), + }), + ); Ok(manifest) } @@ -233,6 +240,20 @@ impl RegistryManifest { pub fn version(&self) -> &str { self.schema_url.version() } + + /// Creates a new `RegistryManifest` from a schema URL with default values. + #[must_use] + pub fn from_schema_url(schema_url: SchemaUrl) -> Self { + Self { + file_format: None, + schema_url, + description: None, + dependencies: vec![], + resolved_schema_uri: None, + stability: Stability::Development, + deserialization_warnings: vec![], + } + } } #[cfg(test)] @@ -243,7 +264,8 @@ mod tests { #[test] fn test_not_found_registry_info() { - let result = RegistryManifest::try_from_file("tests/test_data/missing_registry.yaml"); + let result = + RegistryManifest::try_from_file("tests/test_data/missing_registry.yaml", &mut vec![]); assert!( matches!(result, Err(RegistryManifestNotFound { path, .. }) if path.ends_with("missing_registry.yaml")) ); @@ -253,6 +275,7 @@ mod tests { fn test_incomplete_registry_info() { let result = RegistryManifest::try_from_file( "tests/test_data/incomplete_semconv_registry_manifest.yaml", + &mut vec![], ); assert!( matches!(result, Err(InvalidRegistryManifest { path, .. }) if path.ends_with("incomplete_semconv_registry_manifest.yaml")) @@ -261,9 +284,11 @@ mod tests { #[test] fn test_valid_registry_info() { - let config = - RegistryManifest::try_from_file("tests/test_data/valid_semconv_registry_manifest.yaml") - .expect("Failed to load the registry configuration file."); + let config = RegistryManifest::try_from_file( + "tests/test_data/valid_semconv_registry_manifest.yaml", + &mut vec![], + ) + .expect("Failed to load the registry configuration file."); assert_eq!(config.name(), "acme.com/schemas"); assert_eq!(config.version(), "0.1.0"); } @@ -272,6 +297,7 @@ mod tests { fn test_invalid_registry_info() { let result = RegistryManifest::try_from_file( "tests/test_data/invalid_semconv_registry_manifest.yaml", + &mut vec![], ); let path = PathBuf::from("tests/test_data/invalid_semconv_registry_manifest.yaml"); @@ -355,7 +381,6 @@ registry_path: "./registry" let dep = Dependency { schema_url: "https://opentelemetry.io/schemas/1.0.0".try_into().unwrap(), registry_path: None, - name: None, }; let yaml = serde_yaml::to_string(&dep).expect("Failed to serialize"); @@ -373,7 +398,6 @@ registry_path: "./registry" registry_path: Some(VirtualDirectoryPath::LocalFolder { path: "./registry".to_owned(), }), - name: None, }; let yaml = serde_yaml::to_string(&dep).expect("Failed to serialize"); @@ -386,7 +410,6 @@ registry_path: "./registry" let dep = Dependency { schema_url: "https://opentelemetry.io/schemas/1.0.0".try_into().unwrap(), registry_path: None, - name: None, }; let yaml = serde_yaml::to_string(&dep).expect("Failed to serialize"); @@ -401,7 +424,6 @@ registry_path: "./registry" registry_path: Some(VirtualDirectoryPath::LocalFolder { path: "./test/registry".to_owned(), }), - name: None, }; let yaml = serde_yaml::to_string(&original).expect("Failed to serialize"); diff --git a/crates/weaver_semconv/src/registry.rs b/crates/weaver_semconv/src/registry.rs index 42da8ff5a..d20415971 100644 --- a/crates/weaver_semconv/src/registry.rs +++ b/crates/weaver_semconv/src/registry.rs @@ -152,16 +152,7 @@ impl SemConvRegistry { }, )?; - registry.set_manifest(RegistryManifest { - file_format: None, - schema_url, - description: registry_repo.manifest().and_then(|m| m.description.clone()), - dependencies: vec![], - resolved_schema_uri: None, - stability: crate::stability::Stability::Development, - semconv_version: None, - schema_base_url: None, - }); + registry.set_manifest(RegistryManifest::from_schema_url(schema_url)); } else { registry.manifest = registry_repo.manifest().cloned(); } @@ -396,8 +387,13 @@ mod tests { path: "data".to_owned(), }; let registry_repo = RegistryRepo::try_new( - Some("https://test/42".try_into().expect("Should be valid schema url")), + Some( + "https://test/42" + .try_into() + .expect("Should be valid schema url"), + ), ®istry_path, + &mut vec![], ) .unwrap(); let registry = SemConvRegistry::from_semconv_specs(®istry_repo, semconv_specs).unwrap(); diff --git a/crates/weaver_semconv/src/registry_repo.rs b/crates/weaver_semconv/src/registry_repo.rs index 3b4f41f60..631d69f77 100644 --- a/crates/weaver_semconv/src/registry_repo.rs +++ b/crates/weaver_semconv/src/registry_repo.rs @@ -10,7 +10,7 @@ use crate::manifest::{Dependency, RegistryManifest}; use crate::schema_url::SchemaUrl; use crate::Error; use weaver_common::vdir::{VirtualDirectory, VirtualDirectoryPath}; -use weaver_common::{get_path_type, log_info, log_warn}; +use weaver_common::{get_path_type, log_info}; /// The name of the legacy registry manifest file. #[deprecated(note = "The registry manifest file is renamed to `manifest.yaml`.")] @@ -41,7 +41,10 @@ pub struct RegistryRepo { impl RegistryRepo { /// Creates a new `RegistryRepo` from a `Dependency` object that specifies the schema URL and path. - pub fn try_new_dependency(dependency: &Dependency) -> Result { + pub fn try_new_dependency( + dependency: &Dependency, + nfes: &mut Vec, + ) -> Result { let path = dependency.registry_path.clone().unwrap_or_else(|| { // If no registry path is provided, we assume it's the same schema_url. VirtualDirectoryPath::RemoteArchive { @@ -49,7 +52,7 @@ impl RegistryRepo { sub_folder: None, } }); - Self::try_new(Some(dependency.schema_url.clone()), &path) + Self::try_new(Some(dependency.schema_url.clone()), &path, nfes) } /// Creates a new `RegistryRepo` from a schema URL and `RegistryPath` object that @@ -59,6 +62,7 @@ impl RegistryRepo { pub fn try_new( schema_url: Option, registry_path: &VirtualDirectoryPath, + nfes: &mut Vec, ) -> Result { let registry = VirtualDirectory::try_new(registry_path).map_err(Error::VirtualDirectoryError)?; @@ -72,7 +76,7 @@ impl RegistryRepo { }; temp_repo.manifest_path() } { - let registry_manifest = RegistryManifest::try_from_file(manifest_path)?; + let registry_manifest = RegistryManifest::try_from_file(manifest_path, nfes)?; Ok(Self { schema_url: registry_manifest.schema_url.clone(), registry, @@ -160,10 +164,9 @@ impl RegistryRepo { )); Some(manifest_path) } else if legacy_path.exists() { - log_warn(format!( - "Found registry manifest: {}. Please rename file to {}, as the old name is deprecated and won't be supported in future versions.", - legacy_path.display(), - REGISTRY_MANIFEST + log_info(format!( + "Found registry manifest: {}", + legacy_path.display() )); Some(legacy_path) } else { @@ -212,7 +215,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "../../crates/weaver_codegen_test/semconv_registry".to_owned(), }; - let repo = RegistryRepo::try_new(None, ®istry_path).unwrap(); + let repo = RegistryRepo::try_new(None, ®istry_path, &mut vec![]).unwrap(); let repo_path = repo.path().to_path_buf(); assert!(repo_path.exists()); assert!( @@ -231,8 +234,10 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "tests/published_repository/resolved/1.0.0".to_owned(), }; - let repo = - RegistryRepo::try_new(None, ®istry_path).expect("Failed to load test repository."); + + let repo = RegistryRepo::try_new(None, ®istry_path, &mut vec![]) + .expect("Failed to load test repository."); + let Some(manifest) = repo.manifest() else { panic!("Did not resolve manifest for repo: {repo:?}"); }; @@ -253,8 +258,8 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "tests/published_repository/resolved/2.0.0".to_owned(), }; - let repo = - RegistryRepo::try_new(None, ®istry_path).expect("Failed to load test repository."); + let repo = RegistryRepo::try_new(None, ®istry_path, &mut vec![]) + .expect("Failed to load test repository."); let Some(resolved_path) = repo.resolved_schema_uri() else { panic!( "Should find a resolved schema path from manifest in {}", @@ -267,8 +272,8 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "tests/published_repository/3.0.0".to_owned(), }; - let repo = - RegistryRepo::try_new(None, ®istry_path).expect("Failed to load test repository."); + let repo = RegistryRepo::try_new(None, ®istry_path, &mut vec![]) + .expect("Failed to load test repository."); let Some(resolved_path) = repo.resolved_schema_uri() else { panic!( "Should find a resolved schema path from manifest in {}", diff --git a/crates/weaver_semconv/src/schema_url.rs b/crates/weaver_semconv/src/schema_url.rs index 6083f2339..fab09f2aa 100644 --- a/crates/weaver_semconv/src/schema_url.rs +++ b/crates/weaver_semconv/src/schema_url.rs @@ -209,13 +209,16 @@ mod tests { #[test] fn test_name_extraction_simple() { - let schema_url: SchemaUrl = TryInto::::try_into("https://opentelemetry.io/schemas/1.0.0").unwrap(); + let schema_url: SchemaUrl = + TryInto::::try_into("https://opentelemetry.io/schemas/1.0.0").unwrap(); assert_eq!(schema_url.name(), "opentelemetry.io/schemas"); } #[test] fn test_name_extraction_nested_path() { - let schema_url: SchemaUrl = TryInto::::try_into("https://opentelemetry.io/schemas/sub-component/1.0.0").unwrap(); + let schema_url: SchemaUrl = + TryInto::::try_into("https://opentelemetry.io/schemas/sub-component/1.0.0") + .unwrap(); assert_eq!(schema_url.name(), "opentelemetry.io/schemas/sub-component"); } @@ -227,22 +230,19 @@ mod tests { #[test] fn test_name_extraction_with_port() { - let schema_url: SchemaUrl = - "https://example.com:8080/schemas/1.0.0".try_into().unwrap(); + let schema_url: SchemaUrl = "https://example.com:8080/schemas/1.0.0".try_into().unwrap(); assert_eq!(schema_url.name(), "example.com:8080/schemas"); } #[test] fn test_version_extraction_simple() { - let schema_url: SchemaUrl = - "https://opentelemetry.io/schemas/1.0.0".try_into().unwrap(); + let schema_url: SchemaUrl = "https://opentelemetry.io/schemas/1.0.0".try_into().unwrap(); assert_eq!(schema_url.version(), "1.0.0"); } #[test] fn test_version_extraction_semantic_version() { - let schema_url: SchemaUrl = - "https://example.com/schemas/1.2.3".try_into().unwrap(); + let schema_url: SchemaUrl = "https://example.com/schemas/1.2.3".try_into().unwrap(); assert_eq!(schema_url.version(), "1.2.3"); } @@ -321,8 +321,7 @@ mod tests { #[test] fn test_display() { - let schema_url: SchemaUrl = - "https://example.com/schemas/1.0.0".try_into().unwrap(); + let schema_url: SchemaUrl = "https://example.com/schemas/1.0.0".try_into().unwrap(); assert_eq!( format!("{}", schema_url), "https://example.com/schemas/1.0.0" @@ -331,8 +330,7 @@ mod tests { #[test] fn test_serialize() { - let schema_url: SchemaUrl = - "https://example.com/schemas/1.0.0".try_into().unwrap(); + let schema_url: SchemaUrl = "https://example.com/schemas/1.0.0".try_into().unwrap(); let json = serde_json::to_string(&schema_url).unwrap(); assert_eq!(json, "\"https://example.com/schemas/1.0.0\""); } @@ -364,8 +362,7 @@ mod tests { #[test] fn test_serialize_deserialize_roundtrip() { - let original: SchemaUrl = - "https://opentelemetry.io/schemas/1.0.0".try_into().unwrap(); + let original: SchemaUrl = "https://opentelemetry.io/schemas/1.0.0".try_into().unwrap(); let json = serde_json::to_string(&original).unwrap(); let deserialized: SchemaUrl = serde_json::from_str(&json).unwrap(); assert_eq!(original, deserialized); @@ -373,8 +370,7 @@ mod tests { #[test] fn test_name_caching() { - let schema_url: SchemaUrl = - "https://opentelemetry.io/schemas/1.0.0".try_into().unwrap(); + let schema_url: SchemaUrl = "https://opentelemetry.io/schemas/1.0.0".try_into().unwrap(); // Call name() twice and verify they return the same reference let name1 = schema_url.name(); @@ -389,8 +385,7 @@ mod tests { #[test] fn test_version_caching() { - let schema_url: SchemaUrl = - "https://opentelemetry.io/schemas/1.0.0".try_into().unwrap(); + let schema_url: SchemaUrl = "https://opentelemetry.io/schemas/1.0.0".try_into().unwrap(); // Call version() twice and verify they return the same reference let version1 = schema_url.version(); @@ -405,8 +400,7 @@ mod tests { #[test] fn test_clone_preserves_url_but_resets_cache() { - let original: SchemaUrl = - "https://opentelemetry.io/schemas/1.0.0".try_into().unwrap(); + let original: SchemaUrl = "https://opentelemetry.io/schemas/1.0.0".try_into().unwrap(); // Access name to populate cache let _ = original.name(); diff --git a/crates/weaver_semconv_gen/src/v1.rs b/crates/weaver_semconv_gen/src/v1.rs index 4ce1e7943..5904a4761 100644 --- a/crates/weaver_semconv_gen/src/v1.rs +++ b/crates/weaver_semconv_gen/src/v1.rs @@ -198,7 +198,7 @@ mod tests { path: "data".to_owned(), }; let mut diag_msgs = DiagnosticMessages::empty(); - let registry_repo = RegistryRepo::try_new(None, ®istry_path)?; + let registry_repo = RegistryRepo::try_new(None, ®istry_path, &mut vec![])?; let generator = SnippetGenerator::try_from_registry_repo( ®istry_repo, template, diff --git a/src/registry/check.rs b/src/registry/check.rs index 6e525798f..ac93a7950 100644 --- a/src/registry/check.rs +++ b/src/registry/check.rs @@ -43,9 +43,9 @@ pub(crate) fn command(args: &RegistryCheckArgs) -> Result Result Result WeaverEngine<'a> { diag_msgs: &mut DiagnosticMessages, ) -> Result { let registry_path = &self.registry_config.registry; - let main_registry_repo = RegistryRepo::try_new(None, registry_path)?; + let mut nfes = vec![]; + let main_registry_repo = RegistryRepo::try_new(None, registry_path, &mut nfes)?; + + diag_msgs.extend_from_vec(nfes.into_iter().map(DiagnosticMessage::new).collect()); + self.load_definitions(main_registry_repo, diag_msgs) } diff --git a/tests/resolution_process.rs b/tests/resolution_process.rs index 78e345662..1827108a1 100644 --- a/tests/resolution_process.rs +++ b/tests/resolution_process.rs @@ -34,11 +34,15 @@ fn test_cli_interface() { refspec: None, }; - let schema_url: Option = - Some("https://opentelemetry.io/schemas/1.40.0".try_into().unwrap()); - let registry_repo = RegistryRepo::try_new(schema_url, ®istry_path).unwrap_or_else(|e| { - panic!("Failed to create the registry repo, error: {e}"); - }); + let schema_url: Option = Some( + "https://opentelemetry.io/schemas/1.40.0" + .try_into() + .unwrap(), + ); + let registry_repo = RegistryRepo::try_new(schema_url, ®istry_path, &mut vec![]) + .unwrap_or_else(|e| { + panic!("Failed to create the registry repo, error: {e}"); + }); let loaded = SchemaResolver::load_semconv_repository(registry_repo, false) .ignore(|e| matches!(e.severity(), Some(miette::Severity::Warning))) .into_result_failing_non_fatal() From 587c0c971db36000e6b7b34616e487fcf82d57d1 Mon Sep 17 00:00:00 2001 From: Liudmila Molkova Date: Tue, 17 Feb 2026 19:51:05 -0800 Subject: [PATCH 22/27] unbox, private url --- crates/weaver_resolver/src/lib.rs | 6 +++--- crates/weaver_resolver/src/loader.rs | 4 ++-- crates/weaver_semconv/src/schema_url.rs | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/crates/weaver_resolver/src/lib.rs b/crates/weaver_resolver/src/lib.rs index fb79cbd7e..c09d27689 100644 --- a/crates/weaver_resolver/src/lib.rs +++ b/crates/weaver_resolver/src/lib.rs @@ -43,7 +43,7 @@ impl SchemaResolver { dependencies, } => Self::resolve_registry(repo, specs, imports, dependencies, include_unreferenced), LoadedSemconvRegistry::Resolved(resolved_telemetry_schema) => { - WResult::Ok(*resolved_telemetry_schema) + WResult::Ok(resolved_telemetry_schema) } LoadedSemconvRegistry::ResolvedV2(_) => { todo!("Converting V2 schema back into V1 is unsupported") @@ -69,7 +69,7 @@ impl SchemaResolver { .push(Self::resolve(d, include_unreferenced).map(|s| s.into())); } LoadedSemconvRegistry::Resolved(schema) => { - opt_resolved_dependencies.push(WResult::Ok((*schema).into())); + opt_resolved_dependencies.push(WResult::Ok(schema.into())); } LoadedSemconvRegistry::ResolvedV2(schema) => { opt_resolved_dependencies.push(WResult::Ok(schema.into())); @@ -113,7 +113,7 @@ impl SchemaResolver { ResolvedTelemetrySchema { file_format: "1.0.0".to_owned(), - schema_url: schema_url.url.clone(), + schema_url: schema_url.as_str().to_owned(), registry_id: schema_url.name().to_owned(), registry: resolved_registry, catalog, diff --git a/crates/weaver_resolver/src/loader.rs b/crates/weaver_resolver/src/loader.rs index f3bc04341..d0fdb1435 100644 --- a/crates/weaver_resolver/src/loader.rs +++ b/crates/weaver_resolver/src/loader.rs @@ -36,7 +36,7 @@ pub enum LoadedSemconvRegistry { dependencies: Vec, }, /// The semconv repository is already resolved and can be used as-is. - Resolved(Box), + Resolved(V1Schema), /// The semconv repository is already resolved and can be used as-is. ResolvedV2(V2Schema), } @@ -81,7 +81,7 @@ impl LoadedSemconvRegistry { LoadedSemconvRegistry::Unresolved { repo, .. } => repo.registry_path_repr(), // TODO - are these correct? LoadedSemconvRegistry::Resolved(schema) => &schema.schema_url, - LoadedSemconvRegistry::ResolvedV2(schema) => &schema.schema_url.url, + LoadedSemconvRegistry::ResolvedV2(schema) => schema.schema_url.as_str(), } } diff --git a/crates/weaver_semconv/src/schema_url.rs b/crates/weaver_semconv/src/schema_url.rs index fab09f2aa..c5ce38910 100644 --- a/crates/weaver_semconv/src/schema_url.rs +++ b/crates/weaver_semconv/src/schema_url.rs @@ -11,7 +11,7 @@ use std::sync::OnceLock; #[derive(Debug, Clone, JsonSchema)] pub struct SchemaUrl { /// The schema URL string. - pub url: String, + url: String, #[serde(skip)] #[schemars(skip)] name: OnceLock, From 1428955ef084b14c8f96aa819e9568660f16bbac Mon Sep 17 00:00:00 2001 From: Liudmila Molkova Date: Tue, 17 Feb 2026 19:56:10 -0800 Subject: [PATCH 23/27] remove some boxing --- crates/weaver_live_check/src/advice/mod.rs | 12 ++++++------ crates/weaver_live_check/src/lib.rs | 4 ++-- crates/weaver_live_check/src/live_checker.rs | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/crates/weaver_live_check/src/advice/mod.rs b/crates/weaver_live_check/src/advice/mod.rs index 3fdca2cf5..3daf3128a 100644 --- a/crates/weaver_live_check/src/advice/mod.rs +++ b/crates/weaver_live_check/src/advice/mod.rs @@ -161,7 +161,7 @@ mod tests { // Test DeprecatedAdvisor let mut deprecated_advisor = DeprecatedAdvisor; - let deprecated_attr = Rc::new(VersionedAttribute::V1(Box::new(Attribute { + let deprecated_attr = Rc::new(VersionedAttribute::V1(Attribute { name: "deprecated.attr".to_owned(), requirement_level: RequirementLevel::Basic(BasicRequirementLevelSpec::Required), r#type: PrimitiveOrArray(PrimitiveOrArrayTypeSpec::String), @@ -179,7 +179,7 @@ mod tests { role: None, tags: None, value: None, - }))); + })); let sample_attr = create_sample_attribute("deprecated.attr"); let sample = Sample::Attribute(sample_attr.clone()); @@ -198,7 +198,7 @@ mod tests { // Test TypeAdvisor let mut type_advisor = TypeAdvisor; - let int_attr = Rc::new(VersionedAttribute::V1(Box::new(Attribute { + let int_attr = Rc::new(VersionedAttribute::V1(Attribute { name: "int.attr".to_owned(), requirement_level: RequirementLevel::Basic(BasicRequirementLevelSpec::Required), r#type: PrimitiveOrArray(PrimitiveOrArrayTypeSpec::Int), @@ -214,7 +214,7 @@ mod tests { role: None, tags: None, value: None, - }))); + })); let mut sample_attr = create_sample_attribute("int.attr"); sample_attr.r#type = Some(PrimitiveOrArrayTypeSpec::String); @@ -236,7 +236,7 @@ mod tests { // Test StabilityAdvisor let mut stability_advisor = StabilityAdvisor; - let dev_attr = Rc::new(VersionedAttribute::V1(Box::new(Attribute { + let dev_attr = Rc::new(VersionedAttribute::V1(Attribute { name: "dev.attr".to_owned(), requirement_level: RequirementLevel::Basic(BasicRequirementLevelSpec::Required), r#type: PrimitiveOrArray(PrimitiveOrArrayTypeSpec::String), @@ -252,7 +252,7 @@ mod tests { role: None, tags: None, value: None, - }))); + })); let sample_attr = create_sample_attribute("dev.attr"); let sample = Sample::Attribute(sample_attr.clone()); diff --git a/crates/weaver_live_check/src/lib.rs b/crates/weaver_live_check/src/lib.rs index 23c8d577a..724cffa27 100644 --- a/crates/weaver_live_check/src/lib.rs +++ b/crates/weaver_live_check/src/lib.rs @@ -126,9 +126,9 @@ pub enum VersionedRegistry { #[serde(untagged)] pub enum VersionedAttribute { /// v1 Attribute - V1(Box), + V1(weaver_resolved_schema::attribute::Attribute), /// v2 Attribute - V2(Box), + V2(weaver_forge::v2::attribute::Attribute), } impl VersionedAttribute { diff --git a/crates/weaver_live_check/src/live_checker.rs b/crates/weaver_live_check/src/live_checker.rs index d5afe1e60..82cd7bc17 100644 --- a/crates/weaver_live_check/src/live_checker.rs +++ b/crates/weaver_live_check/src/live_checker.rs @@ -65,7 +65,7 @@ impl LiveChecker { } for attribute in &group.attributes { let attribute_rc = - Rc::new(VersionedAttribute::V1(Box::new(attribute.clone()))); + Rc::new(VersionedAttribute::V1(attribute.clone())); match attribute.r#type { AttributeType::Template(_) => { templates_by_length @@ -93,7 +93,7 @@ impl LiveChecker { let _ = semconv_events.insert(event_name, event_rc); } for attribute in ®istry.registry.attributes { - let attribute_rc = Rc::new(VersionedAttribute::V2(Box::new(attribute.clone()))); + let attribute_rc = Rc::new(VersionedAttribute::V2(attribute.clone())); match &attribute.r#type { AttributeType::Template(_) => { templates_by_length.push((attribute.key.clone(), attribute_rc.clone())); From 2ec64fa4e04de6b7dc5c55f4bc02cd026f02c0ba Mon Sep 17 00:00:00 2001 From: Liudmila Molkova Date: Tue, 17 Feb 2026 20:07:34 -0800 Subject: [PATCH 24/27] up --- crates/weaver_live_check/src/live_checker.rs | 19 +++++++++++++------ crates/weaver_semconv/src/manifest.rs | 20 ++++++++++---------- 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/crates/weaver_live_check/src/live_checker.rs b/crates/weaver_live_check/src/live_checker.rs index 82cd7bc17..baccfd4b6 100644 --- a/crates/weaver_live_check/src/live_checker.rs +++ b/crates/weaver_live_check/src/live_checker.rs @@ -64,8 +64,7 @@ impl LiveChecker { } } for attribute in &group.attributes { - let attribute_rc = - Rc::new(VersionedAttribute::V1(attribute.clone())); + let attribute_rc = Rc::new(VersionedAttribute::V1(attribute.clone())); match attribute.r#type { AttributeType::Template(_) => { templates_by_length @@ -509,7 +508,9 @@ mod tests { fn make_registry(use_v2: bool) -> VersionedRegistry { if use_v2 { VersionedRegistry::V2(Box::new(ForgeResolvedRegistry { - schema_url: "https://example.com/schemas/1.2.3".try_into().expect("Should be valid schema url"), + schema_url: "https://example.com/schemas/1.2.3" + .try_into() + .expect("Should be valid schema url"), registry: Registry { attributes: vec![ V2Attribute { @@ -795,7 +796,9 @@ mod tests { }; VersionedRegistry::V2(Box::new(ForgeResolvedRegistry { - schema_url: "https://example.com/schemas/1.2.3".try_into().expect("Should be valid schema url"), + schema_url: "https://example.com/schemas/1.2.3" + .try_into() + .expect("Should be valid schema url"), registry: Registry { attributes: vec![memory_state_attr.clone()], attribute_groups: vec![], @@ -1003,7 +1006,9 @@ mod tests { }; VersionedRegistry::V2(Box::new(ForgeResolvedRegistry { - schema_url: "https://example.com/schemas/1.2.3".try_into().expect("Should be valid schema url"), + schema_url: "https://example.com/schemas/1.2.3" + .try_into() + .expect("Should be valid schema url"), registry: Registry { attributes: vec![custom_string_attr.clone()], attribute_groups: vec![], @@ -1517,7 +1522,9 @@ mod tests { }; VersionedRegistry::V2(Box::new(ForgeResolvedRegistry { - schema_url: "https://example.com/schemas/1.2.3".try_into().expect("Should be valid schema url"), + schema_url: "https://example.com/schemas/1.2.3" + .try_into() + .expect("Should be valid schema url"), registry: Registry { attributes: vec![session_id_attr.clone(), session_previous_id_attr.clone()], attribute_groups: vec![], diff --git a/crates/weaver_semconv/src/manifest.rs b/crates/weaver_semconv/src/manifest.rs index c17e1ada8..1e7ee1c24 100644 --- a/crates/weaver_semconv/src/manifest.rs +++ b/crates/weaver_semconv/src/manifest.rs @@ -14,7 +14,10 @@ use crate::registry_repo::LEGACY_REGISTRY_MANIFEST; use crate::schema_url::SchemaUrl; use crate::stability::Stability; use crate::Error; -use crate::Error::{InvalidRegistryManifest, LegacyRegistryManifest, RegistryManifestNotFound, DeprecatedSyntaxInRegistryManifest}; +use crate::Error::{ + DeprecatedSyntaxInRegistryManifest, InvalidRegistryManifest, LegacyRegistryManifest, + RegistryManifestNotFound, +}; use schemars::JsonSchema; use serde::{Deserialize, Deserializer, Serialize}; use weaver_common::vdir::VirtualDirectoryPath; @@ -213,15 +216,12 @@ impl RegistryManifest { }); } - nfes.extend( - manifest - .deserialization_warnings - .iter() - .map(|w| DeprecatedSyntaxInRegistryManifest { - path: manifest_path_buf.clone(), - error: w.clone(), - }), - ); + nfes.extend(manifest.deserialization_warnings.iter().map(|w| { + DeprecatedSyntaxInRegistryManifest { + path: manifest_path_buf.clone(), + error: w.clone(), + } + })); Ok(manifest) } From 99f03fba7a165753b1e31be61dfcfa9e99ed84cd Mon Sep 17 00:00:00 2001 From: Liudmila Molkova Date: Tue, 17 Feb 2026 20:23:36 -0800 Subject: [PATCH 25/27] tests --- crates/weaver_mcp/src/service.rs | 3 +- crates/weaver_semconv/src/manifest.rs | 42 +++++++++++++++++++ .../tests/test_data/registry_manifest.yaml | 2 + 3 files changed, 45 insertions(+), 2 deletions(-) create mode 100644 crates/weaver_semconv/tests/test_data/registry_manifest.yaml diff --git a/crates/weaver_mcp/src/service.rs b/crates/weaver_mcp/src/service.rs index 346c81bcd..2982af24f 100644 --- a/crates/weaver_mcp/src/service.rs +++ b/crates/weaver_mcp/src/service.rs @@ -58,8 +58,7 @@ impl WeaverMcpService { let search_context = Arc::new(SearchContext::from_registry(®istry)); // Create versioned registry wrapper once for live check - let versioned_registry = - Arc::new(VersionedRegistry::V2(Box::new(registry.as_ref().clone()))); + let versioned_registry = Arc::new(VersionedRegistry::V2(Box::new((*registry).clone()))); Self { search_context, diff --git a/crates/weaver_semconv/src/manifest.rs b/crates/weaver_semconv/src/manifest.rs index 1e7ee1c24..53ce3838f 100644 --- a/crates/weaver_semconv/src/manifest.rs +++ b/crates/weaver_semconv/src/manifest.rs @@ -432,4 +432,46 @@ registry_path: "./registry" assert_eq!(original.schema_url, deserialized.schema_url); assert!(deserialized.registry_path.is_some()); } + + #[test] + fn test_legacy_manifest_file_warning() { + // Test that loading from a legacy manifest filename (registry_manifest.yaml) produces a warning + let mut warnings = vec![]; + let result = RegistryManifest::try_from_file( + "tests/test_data/registry_manifest.yaml", + &mut warnings, + ); + + assert!(result.is_ok()); + assert!( + warnings + .iter() + .any(|w| matches!(w, LegacyRegistryManifest { .. })), + "Expected a LegacyRegistryManifest warning, got: {warnings:?}" + ); + } + + #[test] + fn test_deprecated_properties_warning() { + // Test that using deprecated properties (semconv_version and schema_base_url) produces a warning + let mut warnings = vec![]; + let result = RegistryManifest::try_from_file( + "tests/test_data/valid_semconv_registry_manifest.yaml", + &mut warnings, + ); + + assert!(result.is_ok()); + let manifest = result.unwrap(); + // The manifest should still work and extract the correct values + assert_eq!(manifest.name(), "acme.com/schemas"); + assert_eq!(manifest.version(), "0.1.0"); + + // But it should produce a deprecation warning + assert!( + warnings + .iter() + .any(|w| matches!(w, DeprecatedSyntaxInRegistryManifest { .. })), + "Expected a DeprecatedSyntaxInRegistryManifest warning, got: {warnings:?}" + ); + } } diff --git a/crates/weaver_semconv/tests/test_data/registry_manifest.yaml b/crates/weaver_semconv/tests/test_data/registry_manifest.yaml new file mode 100644 index 000000000..9ccd8d297 --- /dev/null +++ b/crates/weaver_semconv/tests/test_data/registry_manifest.yaml @@ -0,0 +1,2 @@ +schema_url: "https://acme.com/schemas/1.0.0" +description: This is a legacy manifest file that uses the old filename. From 95e350ee49852019165dc49936bd8dd81a45d058 Mon Sep 17 00:00:00 2001 From: Liudmila Molkova Date: Wed, 18 Feb 2026 06:31:42 -0800 Subject: [PATCH 26/27] box and doc --- crates/weaver_live_check/src/lib.rs | 2 +- crates/weaver_live_check/src/live_checker.rs | 16 ++++++++-------- crates/weaver_live_check/src/stats.rs | 2 +- crates/weaver_semconv/src/registry_repo.rs | 2 +- src/registry/live_check.rs | 2 +- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/crates/weaver_live_check/src/lib.rs b/crates/weaver_live_check/src/lib.rs index 33dd8f87e..d012503df 100644 --- a/crates/weaver_live_check/src/lib.rs +++ b/crates/weaver_live_check/src/lib.rs @@ -116,7 +116,7 @@ pub const DEFAULT_LIVE_CHECK_JQ: &str = include_str!("../../../defaults/jq/advic #[serde(untagged)] pub enum VersionedRegistry { /// v1 ResolvedRegistry - V1(ResolvedRegistry), + V1(Box), /// v2 ForgeResolvedRegistry V2(Box), } diff --git a/crates/weaver_live_check/src/live_checker.rs b/crates/weaver_live_check/src/live_checker.rs index 1e7c872a9..075a8476b 100644 --- a/crates/weaver_live_check/src/live_checker.rs +++ b/crates/weaver_live_check/src/live_checker.rs @@ -613,7 +613,7 @@ mod tests { }, })) } else { - VersionedRegistry::V1(ResolvedRegistry { + VersionedRegistry::V1(Box::new(ResolvedRegistry { registry_url: "TEST".to_owned(), groups: vec![ResolvedGroup { id: "test.comprehensive.internal".to_owned(), @@ -752,7 +752,7 @@ mod tests { body: None, annotations: None, }], - }) + })) } } @@ -848,7 +848,7 @@ mod tests { }, })) } else { - VersionedRegistry::V1(ResolvedRegistry { + VersionedRegistry::V1(Box::new(ResolvedRegistry { registry_url: "TEST_METRICS".to_owned(), groups: vec![ // Attribute group for system memory @@ -983,7 +983,7 @@ mod tests { annotations: None, }, ], - }) + })) } } @@ -1045,7 +1045,7 @@ mod tests { }, })) } else { - VersionedRegistry::V1(ResolvedRegistry { + VersionedRegistry::V1(Box::new(ResolvedRegistry { registry_url: "TEST".to_owned(), groups: vec![ResolvedGroup { id: "custom.comprehensive.internal".to_owned(), @@ -1090,7 +1090,7 @@ mod tests { body: None, annotations: None, }], - }) + })) } } @@ -1600,7 +1600,7 @@ mod tests { }, })) } else { - VersionedRegistry::V1(ResolvedRegistry { + VersionedRegistry::V1(Box::new(ResolvedRegistry { registry_url: "TEST_EVENTS".to_owned(), groups: vec![ ResolvedGroup { @@ -1711,7 +1711,7 @@ mod tests { }), }, ], - }) + })) } } diff --git a/crates/weaver_live_check/src/stats.rs b/crates/weaver_live_check/src/stats.rs index 70c0cab64..ce90bc635 100644 --- a/crates/weaver_live_check/src/stats.rs +++ b/crates/weaver_live_check/src/stats.rs @@ -351,7 +351,7 @@ mod tests { groups: vec![], registry_url: String::new(), }; - let versioned_registry = VersionedRegistry::V1(registry); + let versioned_registry = VersionedRegistry::V1(Box::new(registry)); let mut disabled_stats = LiveCheckStatistics::Disabled(DisabledStatistics); let mut normal_stats = diff --git a/crates/weaver_semconv/src/registry_repo.rs b/crates/weaver_semconv/src/registry_repo.rs index 631d69f77..4f48e1300 100644 --- a/crates/weaver_semconv/src/registry_repo.rs +++ b/crates/weaver_semconv/src/registry_repo.rs @@ -147,7 +147,7 @@ impl RegistryRepo { } } - /// Returns the path to the `registry_manifest.yaml` file (if any). + /// Returns the path to the `manifest.yaml` or legacy `registry_manifest.yaml` (if any of them exist). #[must_use] pub fn manifest_path(&self) -> Option { // First check to see if we're pointing at a manifest. diff --git a/src/registry/live_check.rs b/src/registry/live_check.rs index 7c88b2e6b..4e62a51ff 100644 --- a/src/registry/live_check.rs +++ b/src/registry/live_check.rs @@ -242,7 +242,7 @@ pub(crate) fn command(args: &RegistryLiveCheckArgs) -> Result Date: Sat, 21 Feb 2026 10:58:29 -0800 Subject: [PATCH 27/27] feedback --- crates/weaver_resolver/src/lib.rs | 2 +- crates/weaver_resolver/src/loader.rs | 10 +- crates/weaver_semconv/src/registry.rs | 4 +- crates/weaver_semconv/src/registry_repo.rs | 102 +++++++++++---------- 4 files changed, 60 insertions(+), 58 deletions(-) diff --git a/crates/weaver_resolver/src/lib.rs b/crates/weaver_resolver/src/lib.rs index c09d27689..e83c98ba7 100644 --- a/crates/weaver_resolver/src/lib.rs +++ b/crates/weaver_resolver/src/lib.rs @@ -93,7 +93,7 @@ impl SchemaResolver { let schema_url = if let Some(m) = manifest.as_ref() { m.schema_url.clone() } else { - match SchemaUrl::try_from_name_version(&repo.name(), &repo.version()) { + match SchemaUrl::try_from_name_version(repo.name(), repo.version()) { Ok(url) => url, Err(_) => return WResult::FatalErr(Error::FailToResolveSchemaUrl {}), } diff --git a/crates/weaver_resolver/src/loader.rs b/crates/weaver_resolver/src/loader.rs index d0fdb1435..e92ed9410 100644 --- a/crates/weaver_resolver/src/loader.rs +++ b/crates/weaver_resolver/src/loader.rs @@ -110,7 +110,7 @@ impl LoadedSemconvRegistry { LoadedSemconvRegistry::Unresolved { repo, dependencies, .. } => { - let mut result = vec![repo.name().to_string()]; + let mut result = vec![repo.name().to_owned()]; for d in dependencies { result.extend(d.registry_names()); } @@ -177,7 +177,7 @@ fn load_semconv_repository_recursive( registry_name: registry_repo.registry_path_repr().to_owned(), }); } - let registry_name = registry_repo.name().to_string(); + let registry_name = registry_repo.name().to_owned(); // Check for circular dependency if visited_registries.contains(®istry_name) { dependency_chain.push(registry_name.clone()); @@ -309,7 +309,7 @@ fn load_definition_repository( // TODO - less confusing way to load semconv specs. vec![SemConvRegistry::semconv_spec_from_file( - ®istry_repo.name(), + registry_repo.name(), entry.path(), &unversioned_validator, &versioned_validator, @@ -414,7 +414,7 @@ mod tests { dependencies, } = loaded { - assert_eq!("acme.com/schemas", repo.name().as_ref()); + assert_eq!("acme.com/schemas", repo.name()); assert_eq!(dependencies.len(), 1); assert_eq!(specs.len(), 1); assert_eq!(imports.len(), 1); @@ -425,7 +425,7 @@ mod tests { dependencies, }] = &dependencies.as_slice() { - assert_eq!("opentelemetry.io/schemas", repo.name().as_ref()); + assert_eq!("opentelemetry.io/schemas", repo.name()); assert_eq!(dependencies.len(), 0); assert_eq!(specs.len(), 1); assert_eq!(imports.len(), 0); diff --git a/crates/weaver_semconv/src/registry.rs b/crates/weaver_semconv/src/registry.rs index d20415971..5f3581a05 100644 --- a/crates/weaver_semconv/src/registry.rs +++ b/crates/weaver_semconv/src/registry.rs @@ -126,7 +126,7 @@ impl SemConvRegistry { LazyLock::new(|| Regex::new(r".*(v\d+\.\d+\.\d+).*").expect("Invalid regex")); // Load all the semantic convention registry. - let mut registry = SemConvRegistry::new(registry_repo.name().as_ref()); + let mut registry = SemConvRegistry::new(registry_repo.name()); for spec in semconv_specs { registry.add_semconv_spec(spec); @@ -145,7 +145,7 @@ impl SemConvRegistry { } let schema_url = - SchemaUrl::try_from_name_version(®istry_repo.name(), &semconv_version).map_err( + SchemaUrl::try_from_name_version(registry_repo.name(), &semconv_version).map_err( |e| Error::InvalidRegistryManifest { path: registry_repo.registry_path_repr().into(), error: e.clone(), diff --git a/crates/weaver_semconv/src/registry_repo.rs b/crates/weaver_semconv/src/registry_repo.rs index 4f48e1300..df636e36f 100644 --- a/crates/weaver_semconv/src/registry_repo.rs +++ b/crates/weaver_semconv/src/registry_repo.rs @@ -4,7 +4,6 @@ use std::default::Default; use std::path::{Path, PathBuf}; -use std::sync::Arc; use crate::manifest::{Dependency, RegistryManifest}; use crate::schema_url::SchemaUrl; @@ -19,6 +18,39 @@ pub const LEGACY_REGISTRY_MANIFEST: &str = "registry_manifest.yaml"; /// The name of the registry manifest file. pub const REGISTRY_MANIFEST: &str = "manifest.yaml"; +/// Finds the path to the manifest file, could be +/// - directly the path to the manifest file, or +/// - either `manifest.yaml` or `registry_manifest.yaml` in the given directory. +/// - None otherwise. +fn find_manifest_path(registry_path: &Path) -> Option { + // First check to see if we're pointing at a manifest. + if registry_path.is_file() { + // The path *is* the manifest. + return Some(registry_path.to_path_buf()); + } + let manifest_path = registry_path.join(REGISTRY_MANIFEST); + let legacy_path = registry_path.join(LEGACY_REGISTRY_MANIFEST); + if manifest_path.exists() { + log_info(format!( + "Found registry manifest: {}", + manifest_path.display() + )); + Some(manifest_path) + } else if legacy_path.exists() { + log_info(format!( + "Found registry manifest: {}", + legacy_path.display() + )); + Some(legacy_path) + } else { + log_info(format!( + "No registry manifest found: {}", + manifest_path.display() + )); + None + } +} + /// A semantic convention registry repository that can be: /// - A definition repository, which is one of: /// - A simple wrapper around a local directory @@ -37,6 +69,9 @@ pub struct RegistryRepo { // The registry manifest definition. manifest: Option, + + // Cached path to the manifest file (if it exists). + manifest_path: Option, } impl RegistryRepo { @@ -67,20 +102,14 @@ impl RegistryRepo { let registry = VirtualDirectory::try_new(registry_path).map_err(Error::VirtualDirectoryError)?; // Try to load manifest - if let Some(manifest_path) = { - // We need a temporary RegistryRepo to call manifest_path - let temp_repo = Self { - schema_url: SchemaUrl::new_unknown(), - registry: registry.clone(), - manifest: None, - }; - temp_repo.manifest_path() - } { - let registry_manifest = RegistryManifest::try_from_file(manifest_path, nfes)?; + let manifest_path = find_manifest_path(registry.path()); + if let Some(ref path) = manifest_path { + let registry_manifest = RegistryManifest::try_from_file(path, nfes)?; Ok(Self { schema_url: registry_manifest.schema_url.clone(), registry, manifest: Some(registry_manifest), + manifest_path, }) } else { // No manifest @@ -89,20 +118,21 @@ impl RegistryRepo { schema_url: schema_url_combined.clone(), registry, manifest: None, + manifest_path: None, }) } } /// Returns the registry name (from manifest if present, otherwise top-level field). #[must_use] - pub fn name(&self) -> Arc { - self.schema_url.name().into() + pub fn name(&self) -> &str { + self.schema_url.name() } /// Returns the registry version (from manifest if present, otherwise top-level field). #[must_use] - pub fn version(&self) -> Arc { - self.schema_url.version().into() + pub fn version(&self) -> &str { + self.schema_url.version() } /// Returns the local path to the semconv registry. @@ -131,7 +161,10 @@ impl RegistryRepo { match get_path_type(resolved_uri) { weaver_common::PathType::RelativePath => { // We need to understand if the manifest URI is the same as the registry URI. - let vdir_was_manifest_file = self.manifest_path()? == self.registry.path(); + let vdir_was_manifest_file = self + .manifest_path + .clone() + .is_some_and(|mp| mp == self.registry.path()); Some(self.registry.vdir_path().map_sub_folder(|path| { if vdir_was_manifest_file { match Path::new(&path).parent() { @@ -147,41 +180,9 @@ impl RegistryRepo { } } - /// Returns the path to the `manifest.yaml` or legacy `registry_manifest.yaml` (if any of them exist). - #[must_use] - pub fn manifest_path(&self) -> Option { - // First check to see if we're pointing at a manifest. - if self.registry.path().is_file() { - // The VirtualDirectory *is* the registry. - return Some(self.registry.path().to_path_buf()); - } - let manifest_path = self.registry.path().join(REGISTRY_MANIFEST); - let legacy_path = self.registry.path().join(LEGACY_REGISTRY_MANIFEST); - if manifest_path.exists() { - log_info(format!( - "Found registry manifest: {}", - manifest_path.display() - )); - Some(manifest_path) - } else if legacy_path.exists() { - log_info(format!( - "Found registry manifest: {}", - legacy_path.display() - )); - Some(legacy_path) - } else { - log_info(format!( - "No registry manifest found: {}", - manifest_path.display() - )); - None - } - } - /// Returns the registry schema URL. - #[must_use] - pub fn schema_url(&self) -> SchemaUrl { - self.schema_url.clone() + pub fn schema_url(&self) -> &SchemaUrl { + &self.schema_url } } @@ -191,6 +192,7 @@ impl Default for RegistryRepo { schema_url: SchemaUrl::new_unknown(), registry: VirtualDirectory::default(), manifest: None, + manifest_path: None, } } }