diff --git a/src/config.rs b/src/config.rs index 797c133..3f5a2e4 100644 --- a/src/config.rs +++ b/src/config.rs @@ -2,6 +2,10 @@ use serde::{Deserialize, Serialize}; /// Configuration for mapping JSON data to a graph structure. /// +/// The node label is derived from the array key in the JSON (i.e., the last +/// segment of `node_path`). For example, a `node_path` of `"users"` produces +/// nodes with label `:users`, and `"data.Patent"` produces `:Patent`. +/// /// # Example /// /// ```rust @@ -10,21 +14,18 @@ use serde::{Deserialize, Serialize}; /// let config = GraphConfig { /// node_path: "users".to_string(), /// id_field: "id".to_string(), -/// label_field: Some("role".to_string()), /// relation_fields: vec!["friends".to_string()], /// }; /// ``` #[derive(Debug, Clone, Serialize, Deserialize)] pub struct GraphConfig { - /// JSON path to the array of nodes (e.g., "data.users.*" or "users") + /// JSON path to the array of nodes (e.g., "data.users" or "Patent"). + /// The last segment is used as the node label. pub node_path: String, /// Field name for the node ID pub id_field: String, - /// Optional field name for the node label - pub label_field: Option, - /// Field names that contain arrays of related node IDs pub relation_fields: Vec, } @@ -34,13 +35,11 @@ impl GraphConfig { pub fn new( node_path: impl Into, id_field: impl Into, - label_field: Option, relation_fields: Vec, ) -> Self { Self { node_path: node_path.into(), id_field: id_field.into(), - label_field, relation_fields, } } @@ -50,10 +49,20 @@ impl GraphConfig { Self { node_path: node_path.into(), id_field: id_field.into(), - label_field: None, relation_fields: Vec::new(), } } + + /// Derive the node label from the last segment of `node_path`. + /// + /// For `"users"` → `"users"`, for `"data.Patent"` → `"Patent"`. + pub fn label(&self) -> String { + self.node_path + .rsplit('.') + .next() + .unwrap_or(&self.node_path) + .to_string() + } } impl Default for GraphConfig { @@ -61,7 +70,6 @@ impl Default for GraphConfig { Self { node_path: "nodes".to_string(), id_field: "id".to_string(), - label_field: None, relation_fields: Vec::new(), } } @@ -76,21 +84,14 @@ mod tests { let config = GraphConfig::default(); assert_eq!(config.node_path, "nodes"); assert_eq!(config.id_field, "id"); - assert!(config.label_field.is_none()); assert!(config.relation_fields.is_empty()); } #[test] fn test_new_config() { - let config = GraphConfig::new( - "users", - "id", - Some("role".to_string()), - vec!["friends".to_string()], - ); + let config = GraphConfig::new("users", "id", vec!["friends".to_string()]); assert_eq!(config.node_path, "users"); assert_eq!(config.id_field, "id"); - assert_eq!(config.label_field, Some("role".to_string())); assert_eq!(config.relation_fields, vec!["friends".to_string()]); } @@ -99,7 +100,15 @@ mod tests { let config = GraphConfig::minimal("users", "id"); assert_eq!(config.node_path, "users"); assert_eq!(config.id_field, "id"); - assert!(config.label_field.is_none()); assert!(config.relation_fields.is_empty()); } + + #[test] + fn test_label() { + let config = GraphConfig::minimal("users", "id"); + assert_eq!(config.label(), "users"); + + let config = GraphConfig::minimal("data.Patent", "id"); + assert_eq!(config.label(), "Patent"); + } } diff --git a/src/engine/mod.rs b/src/engine/mod.rs index 5bd1dda..df00592 100644 --- a/src/engine/mod.rs +++ b/src/engine/mod.rs @@ -146,7 +146,6 @@ mod tests { let config = GraphConfig { node_path: "users".to_string(), id_field: "id".to_string(), - label_field: Some("role".to_string()), relation_fields: vec![], }; diff --git a/src/engine/storage/json.rs b/src/engine/storage/json.rs index 68ea436..2ea1ed9 100644 --- a/src/engine/storage/json.rs +++ b/src/engine/storage/json.rs @@ -111,12 +111,7 @@ pub fn build_graph_from_json(json: &Value, config: &GraphConfig) -> StorageResul })? .to_string(); - let label = config.label_field.as_ref().and_then(|field| { - node_json - .get(field) - .and_then(|v| v.as_str()) - .map(String::from) - }); + let label = Some(config.label()); let node = Node::new(id.clone(), label, node_json.clone()); graph.add_node(node); @@ -283,7 +278,6 @@ mod tests { let config = GraphConfig { node_path: "users".to_string(), id_field: "id".to_string(), - label_field: Some("role".to_string()), relation_fields: vec![], }; @@ -338,7 +332,6 @@ mod tests { let config = GraphConfig { node_path: "users".to_string(), id_field: "id".to_string(), - label_field: None, relation_fields: vec!["friends".to_string()], }; diff --git a/src/engine/storage/mod.rs b/src/engine/storage/mod.rs index 5abdb45..70c4f69 100644 --- a/src/engine/storage/mod.rs +++ b/src/engine/storage/mod.rs @@ -32,7 +32,6 @@ mod tests { let config = GraphConfig { node_path: "users".to_string(), id_field: "id".to_string(), - label_field: Some("role".to_string()), relation_fields: vec![], }; diff --git a/src/lib.rs b/src/lib.rs index 2f5af3e..f90cc7b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -21,7 +21,6 @@ //! let config = GraphConfig { //! node_path: "users".to_string(), //! id_field: "id".to_string(), -//! label_field: Some("role".to_string()), //! relation_fields: vec!["friends".to_string()], //! }; //! @@ -31,9 +30,9 @@ //! let result = engine.execute("MATCH (u) RETURN COUNT(u)").unwrap(); //! assert_eq!(result.get_single_value().unwrap().as_i64(), Some(2)); //! -//! // Count admins -//! let result = engine.execute("MATCH (u:admin) RETURN COUNT(u)").unwrap(); -//! assert_eq!(result.get_single_value().unwrap().as_i64(), Some(1)); +//! // Match by label (derived from array key) +//! let result = engine.execute("MATCH (u:users) RETURN COUNT(u)").unwrap(); +//! assert_eq!(result.get_single_value().unwrap().as_i64(), Some(2)); //! //! // Sum ages //! let result = engine.execute("MATCH (u) RETURN SUM(u.age)").unwrap(); @@ -129,7 +128,6 @@ impl CypherEngine { /// let config = GraphConfig { /// node_path: "users".to_string(), /// id_field: "id".to_string(), - /// label_field: Some("role".to_string()), /// relation_fields: vec![], /// }; /// @@ -347,7 +345,7 @@ impl CypherEngine { let mut labels_by_label: std::collections::HashMap> = std::collections::HashMap::new(); for node in &self.graph.nodes { - let label = node.label.as_ref().unwrap_or(&"Node".to_string()).clone(); + let label = node.label.as_ref().unwrap().clone(); labels_by_label.entry(label).or_default().push(node); } @@ -402,16 +400,8 @@ impl CypherEngine { > = std::collections::HashMap::new(); for edge in &self.graph.edges { - let from_label = self.graph.nodes[edge.from] - .label - .as_ref() - .unwrap_or(&"Node".to_string()) - .clone(); - let to_label = self.graph.nodes[edge.to] - .label - .as_ref() - .unwrap_or(&"Node".to_string()) - .clone(); + let from_label = self.graph.nodes[edge.from].label.as_ref().unwrap().clone(); + let to_label = self.graph.nodes[edge.to].label.as_ref().unwrap().clone(); rel_types .entry(edge.rel_type.clone()) @@ -471,7 +461,7 @@ mod tests { let config = GraphConfig { node_path: "users".to_string(), id_field: "id".to_string(), - label_field: Some("role".to_string()), + relation_fields: vec![], }; @@ -481,13 +471,9 @@ mod tests { let result = engine.execute("MATCH (u) RETURN COUNT(u)").unwrap(); assert_eq!(result.get_single_value().unwrap().as_i64(), Some(3)); - // Count admins - let result = engine.execute("MATCH (u:admin) RETURN COUNT(u)").unwrap(); - assert_eq!(result.get_single_value().unwrap().as_i64(), Some(2)); - - // Count regular users - let result = engine.execute("MATCH (u:user) RETURN COUNT(u)").unwrap(); - assert_eq!(result.get_single_value().unwrap().as_i64(), Some(1)); + // Count by label (derived from array key) + let result = engine.execute("MATCH (u:users) RETURN COUNT(u)").unwrap(); + assert_eq!(result.get_single_value().unwrap().as_i64(), Some(3)); } #[test] @@ -503,7 +489,7 @@ mod tests { let config = GraphConfig { node_path: "users".to_string(), id_field: "id".to_string(), - label_field: Some("role".to_string()), + relation_fields: vec![], }; let engine = CypherEngine::from_json(&data, config).unwrap(); @@ -512,9 +498,9 @@ mod tests { let result = engine.execute("MATCH (u) RETURN SUM(u.age)").unwrap(); assert_eq!(result.get_single_value().unwrap().as_i64(), Some(90)); - // Sum admin ages - let result = engine.execute("MATCH (u:admin) RETURN SUM(u.age)").unwrap(); - assert_eq!(result.get_single_value().unwrap().as_i64(), Some(65)); + // Sum ages by label + let result = engine.execute("MATCH (u:users) RETURN SUM(u.age)").unwrap(); + assert_eq!(result.get_single_value().unwrap().as_i64(), Some(90)); } #[test] @@ -529,7 +515,7 @@ mod tests { let config = GraphConfig { node_path: "users".to_string(), id_field: "id".to_string(), - label_field: Some("role".to_string()), + relation_fields: vec![], }; @@ -563,7 +549,7 @@ mod tests { let config = GraphConfig { node_path: "users".to_string(), id_field: "id".to_string(), - label_field: None, + relation_fields: vec!["friends".to_string()], }; @@ -594,7 +580,7 @@ mod tests { let config = GraphConfig { node_path: "data.users".to_string(), id_field: "id".to_string(), - label_field: Some("role".to_string()), + relation_fields: vec![], }; @@ -667,15 +653,15 @@ mod tests { let config = GraphConfig { node_path: "users".to_string(), id_field: "id".to_string(), - label_field: Some("role".to_string()), + relation_fields: vec![], }; let engine = CypherEngine::from_json(&data, config).unwrap(); - // AND - admin and active + // AND - admin and active (using WHERE on property) let result = engine - .execute("MATCH (u:admin) WHERE u.active = \"true\" RETURN COUNT(u)") + .execute("MATCH (u) WHERE u.role = \"admin\" AND u.active = \"true\" RETURN COUNT(u)") .unwrap(); assert_eq!(result.get_single_value().unwrap().as_i64(), Some(1)); @@ -721,9 +707,9 @@ mod tests { let result = engine.execute("MATCH (u) RETURN COUNT(u)").unwrap(); assert_eq!(result.get_single_value().unwrap().as_i64(), Some(2)); - // Label should be detected - let result = engine.execute("MATCH (u:admin) RETURN COUNT(u)").unwrap(); - assert_eq!(result.get_single_value().unwrap().as_i64(), Some(1)); + // Label should be derived from array key + let result = engine.execute("MATCH (u:users) RETURN COUNT(u)").unwrap(); + assert_eq!(result.get_single_value().unwrap().as_i64(), Some(2)); } #[test] @@ -743,7 +729,6 @@ mod tests { let primary = schema.primary_recommendation.as_ref().unwrap(); assert_eq!(primary.path, "users"); assert_eq!(primary.recommended_id_field, Some("id".to_string())); - assert_eq!(primary.recommended_label_field, Some("role".to_string())); assert!( primary .recommended_relation_fields @@ -767,7 +752,6 @@ mod tests { let primary = schema.primary_recommendation.as_ref().unwrap(); assert_eq!(primary.path, "data.network.users"); assert_eq!(primary.recommended_id_field, Some("id".to_string())); - assert_eq!(primary.recommended_label_field, Some("type".to_string())); } #[test] @@ -801,7 +785,7 @@ mod tests { let config = GraphConfig { node_path: "users".to_string(), id_field: "id".to_string(), - label_field: Some("role".to_string()), + relation_fields: vec!["friends".to_string()], }; @@ -811,8 +795,7 @@ mod tests { // Verify schema contains expected elements assert!(schema.contains("Graph Schema")); assert!(schema.contains("Node Types:")); - assert!(schema.contains("(:admin")); - assert!(schema.contains("(:user")); + assert!(schema.contains("(:users")); assert!(schema.contains("Relationship Types:")); assert!(schema.contains("friends")); } diff --git a/src/schema.rs b/src/schema.rs index e3fc3dd..4fee6a3 100644 --- a/src/schema.rs +++ b/src/schema.rs @@ -29,8 +29,6 @@ pub struct NodeFieldInfo { pub field_type: FieldType, /// Whether this field could be an ID field pub is_id_candidate: bool, - /// Whether this field could be a label field - pub is_label_candidate: bool, /// Whether this field could be a relation field (contains array of IDs) pub is_relation_candidate: bool, } @@ -72,8 +70,6 @@ pub struct ArraySchema { pub field_values: HashMap>, /// Recommended ID field for this array pub recommended_id_field: Option, - /// Recommended label field for this array - pub recommended_label_field: Option, /// Fields that likely contain relationships (arrays of IDs) pub recommended_relation_fields: Vec, } @@ -103,12 +99,10 @@ impl SchemaDetection { pub fn to_graph_config(&self) -> Option { let schema = &self.primary_recommendation.as_ref()?; let id_field = schema.recommended_id_field.clone()?; - let label_field = schema.recommended_label_field.clone(); Some(GraphConfig { node_path: schema.path.clone(), id_field, - label_field, relation_fields: schema.recommended_relation_fields.clone(), }) } @@ -124,62 +118,14 @@ impl SchemaDetection { output.push_str("Node Types:\n"); for schema in &self.array_schemas { - if let Some(label_field) = &schema.recommended_label_field { - // Show unique labels found - let labels: Vec = schema - .field_values - .get(label_field) - .map(|values| { - values - .iter() - .filter_map(|v| v.as_str()) - .map(|s| s.to_string()) - .collect::>() - }) - .unwrap_or_default() - .into_iter() - .collect::>() - .into_iter() - .collect::>(); - - for label in &labels { - output.push_str(&format!(" (:{} {} nodes)\n", label, schema.element_count)); - } - } else { - output.push_str(&format!( - " (:{} {} nodes)\n", - schema.path, schema.element_count - )); - } + let label = schema.path.rsplit('.').next().unwrap_or(&schema.path); + output.push_str(&format!(" (:{} {} nodes)\n", label, schema.element_count)); } output.push_str("\nProperties:\n"); for schema in &self.array_schemas { - let prefix = if let Some(label_field) = &schema.recommended_label_field { - // Find most common label - let most_common_label = schema - .field_values - .get(label_field) - .and_then(|values| { - values - .iter() - .filter_map(|v| v.as_str()) - .fold(HashMap::new(), |mut acc, label| { - *acc.entry(label).or_insert(0) += 1; - acc - }) - .into_iter() - .max_by_key(|(_, count)| *count) - .map(|(label, _)| label.to_string()) - }) - .unwrap_or_else(|| schema.path.clone()); - - format!(":{}", most_common_label) - } else { - format!(":{}", schema.path) - }; - - output.push_str(&format!("{} {{", prefix)); + let label = schema.path.rsplit('.').next().unwrap_or(&schema.path); + output.push_str(&format!(":{} {{", label)); let mut field_strings: Vec = schema .fields @@ -201,32 +147,9 @@ impl SchemaDetection { if has_relations { output.push_str("\nRelationship Types:\n"); for schema in &self.array_schemas { + let label = schema.path.rsplit('.').next().unwrap_or(&schema.path); for rel_field in &schema.recommended_relation_fields { - let prefix = if let Some(label_field) = &schema.recommended_label_field { - // Find most common label - let most_common_label = schema - .field_values - .get(label_field) - .and_then(|values| { - values - .iter() - .filter_map(|v| v.as_str()) - .fold(HashMap::new(), |mut acc, label| { - *acc.entry(label).or_insert(0) += 1; - acc - }) - .into_iter() - .max_by_key(|(_, count)| *count) - .map(|(label, _)| label.to_string()) - }) - .unwrap_or_else(|| schema.path.clone()); - - format!("(:{})-", most_common_label) - } else { - format!("(:{})-", schema.path) - }; - - output.push_str(&format!("{}[:{}]->()\n", prefix, rel_field)); + output.push_str(&format!("(:{})-[:{}]->()\n", label, rel_field)); } } } @@ -239,44 +162,16 @@ impl SchemaDetection { let mut patterns = Vec::new(); for schema in &self.array_schemas { - let pattern = if let Some(label_field) = &schema.recommended_label_field { - // Find most common label - let most_common_label = schema - .field_values - .get(label_field) - .and_then(|values| { - values - .iter() - .filter_map(|v| v.as_str()) - .fold(HashMap::new(), |mut acc, label| { - *acc.entry(label).or_insert(0) += 1; - acc - }) - .into_iter() - .max_by_key(|(_, count)| *count) - .map(|(label, _)| label.to_string()) - }) - .unwrap_or_else(|| schema.path.clone()); - - if !schema.recommended_relation_fields.is_empty() { - format!( - "(:{})-[{}]->(:{})", - most_common_label, - schema.recommended_relation_fields.join("|"), - schema.path - ) - } else { - format!("(:{})", most_common_label) - } - } else if !schema.recommended_relation_fields.is_empty() { + let label = schema.path.rsplit('.').next().unwrap_or(&schema.path); + let pattern = if !schema.recommended_relation_fields.is_empty() { format!( "(:{})-[{}]->(:{})", - schema.path, + label, schema.recommended_relation_fields.join("|"), - schema.path + label ) } else { - format!("(:{})", schema.path) + format!("(:{})", label) }; patterns.push(pattern); @@ -384,12 +279,6 @@ fn find_arrays(data: &Value, current_path: &str, results: &mut Vec) || field_name == "uuid" || field_name == "_id"; - // Check if this could be a label field - let is_label_candidate = matches!( - field_name.as_str(), - "type" | "role" | "kind" | "category" | "label" | "status" - ); - // Check if this could be a relation field (array of IDs) let is_relation_candidate = field_type == FieldType::Array && !is_id_candidate; @@ -397,7 +286,6 @@ fn find_arrays(data: &Value, current_path: &str, results: &mut Vec) name: field_name.clone(), field_type, is_id_candidate, - is_label_candidate, is_relation_candidate, }); } @@ -408,12 +296,6 @@ fn find_arrays(data: &Value, current_path: &str, results: &mut Vec) .find(|f| f.is_id_candidate) .map(|f| f.name.clone()); - // Find recommended label field - let recommended_label_field = fields - .iter() - .find(|f| f.is_label_candidate) - .map(|f| f.name.clone()); - // Find recommended relation fields let recommended_relation_fields: Vec = fields .iter() @@ -427,7 +309,6 @@ fn find_arrays(data: &Value, current_path: &str, results: &mut Vec) fields, field_values, recommended_id_field, - recommended_label_field, recommended_relation_fields, }); } @@ -493,10 +374,6 @@ mod tests { assert_eq!(users_schema.path, "users"); assert_eq!(users_schema.element_count, 2); assert_eq!(users_schema.recommended_id_field, Some("id".to_string())); - assert_eq!( - users_schema.recommended_label_field, - Some("role".to_string()) - ); } #[test] @@ -560,7 +437,6 @@ mod tests { let config = schema.to_graph_config().unwrap(); assert_eq!(config.node_path, "users"); assert_eq!(config.id_field, "id"); - assert_eq!(config.label_field, Some("role".to_string())); } #[test]