From 100383e016241f8163e4c89735501461aee2a201 Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Thu, 19 Mar 2026 15:14:02 -0400 Subject: [PATCH 1/5] add filtered_variables shared class as a move toward consistency --- .../constants/permissibility.py | 1 + .../operations/base_operation.py | 12 -------- .../operations/expected_variables.py | 24 ++++++--------- .../operations/filtered_variables.py | 30 +++++++++++++++++++ .../get_dataset_filtered_variables.py | 21 ++----------- .../operations/operations_factory.py | 2 ++ .../operations/permissible_variables.py | 21 ++++--------- .../operations/required_variables.py | 20 ++++--------- .../utilities/sdtm_utilities.py | 15 ++++++++++ resources/schema/rule/Operations.json | 9 ++++++ resources/schema/rule/Operations.md | 17 +++++++++-- ...test_label_referenced_variable_metadata.py | 1 + .../test_name_referenced_variable_metadata.py | 1 + 13 files changed, 98 insertions(+), 76 deletions(-) create mode 100644 cdisc_rules_engine/operations/filtered_variables.py diff --git a/cdisc_rules_engine/constants/permissibility.py b/cdisc_rules_engine/constants/permissibility.py index 5ca672c5d..628bd1f01 100644 --- a/cdisc_rules_engine/constants/permissibility.py +++ b/cdisc_rules_engine/constants/permissibility.py @@ -2,3 +2,4 @@ EXPECTED = "Exp" PERMISSIBLE = "Perm" PERMISSIBILITY_KEY = "core" +PERMISSIBILITY_DEFAULT = PERMISSIBLE diff --git a/cdisc_rules_engine/operations/base_operation.py b/cdisc_rules_engine/operations/base_operation.py index a045a323b..f397a9a3b 100644 --- a/cdisc_rules_engine/operations/base_operation.py +++ b/cdisc_rules_engine/operations/base_operation.py @@ -1,8 +1,4 @@ from cdisc_rules_engine.models.operation_params import OperationParams -from cdisc_rules_engine.constants.permissibility import ( - PERMISSIBLE, - PERMISSIBILITY_KEY, -) from abc import abstractmethod from typing import List import pandas as pd @@ -239,14 +235,6 @@ def _get_variables_metadata_from_standard(self) -> List[dict]: dataset_path=self.params.dataset_path, ) - def get_allowed_variable_permissibility(self, variable_metadata: dict): - """ - Returns the permissibility value of a variable allowed in the current domain - """ - if PERMISSIBILITY_KEY in variable_metadata: - return variable_metadata[PERMISSIBILITY_KEY] - return PERMISSIBLE - def _get_variable_names_list(self, domain, dataframe): # get variables metadata from the standard model variables_metadata: List[dict] = ( diff --git a/cdisc_rules_engine/operations/expected_variables.py b/cdisc_rules_engine/operations/expected_variables.py index e441236c2..06f98f1fa 100644 --- a/cdisc_rules_engine/operations/expected_variables.py +++ b/cdisc_rules_engine/operations/expected_variables.py @@ -1,9 +1,11 @@ -from cdisc_rules_engine.operations.base_operation import BaseOperation -from cdisc_rules_engine.constants.permissibility import EXPECTED -from typing import List +from cdisc_rules_engine.constants.permissibility import ( + EXPECTED, + PERMISSIBILITY_KEY, +) +from cdisc_rules_engine.operations.filtered_variables import FilteredVariables -class ExpectedVariables(BaseOperation): +class ExpectedVariables(FilteredVariables): def _execute_operation(self): """ Fetches required variables for a given domain from the CDISC library. @@ -17,14 +19,6 @@ def _execute_operation(self): The lists with column names are sorted in accordance to "ordinal" key of library metadata. """ - - # get variables metadata from the standard/model - variables_metadata: List[dict] = self._get_variables_metadata_from_standard() - - return list( - { - var["name"].replace("--", self.params.domain): None - for var in variables_metadata - if self.get_allowed_variable_permissibility(var) == EXPECTED - }.keys() - ) + self.params.key_name = PERMISSIBILITY_KEY + self.params.key_value = EXPECTED + return super()._execute_operation() diff --git a/cdisc_rules_engine/operations/filtered_variables.py b/cdisc_rules_engine/operations/filtered_variables.py new file mode 100644 index 000000000..57e604a45 --- /dev/null +++ b/cdisc_rules_engine/operations/filtered_variables.py @@ -0,0 +1,30 @@ +from typing import List +from cdisc_rules_engine.operations.base_operation import BaseOperation + + +class FilteredVariables(BaseOperation): + def _execute_operation(self): + """ + Filter variables from the library based on specified criteria. + + Expected parameters: + - key_name: The metadata key to filter by (e.g., "role", "type", etc.) + - key_value: The value to match for the filter key (e.g., "Timing", "Identifier", etc.) + """ + filter_key = self.params.key_name + filter_value = self.params.key_value + + # Get variables metadata from the standard model for the current domain + variables_metadata: List[dict] = self._get_variables_metadata_from_standard() + + # Filter variables based on the specified criteria + filtered_variables = [ + var for var in variables_metadata if var.get(filter_key) == filter_value + ] + + # Replace variable wildcards with actual domain names + variable_names_list = self._replace_variable_wildcards( + filtered_variables, self.params.domain + ) + + return variable_names_list diff --git a/cdisc_rules_engine/operations/get_dataset_filtered_variables.py b/cdisc_rules_engine/operations/get_dataset_filtered_variables.py index b54b211e9..967953482 100644 --- a/cdisc_rules_engine/operations/get_dataset_filtered_variables.py +++ b/cdisc_rules_engine/operations/get_dataset_filtered_variables.py @@ -1,8 +1,7 @@ -from typing import List -from cdisc_rules_engine.operations.base_operation import BaseOperation +from cdisc_rules_engine.operations.filtered_variables import FilteredVariables -class GetDatasetFilteredVariables(BaseOperation): +class GetDatasetFilteredVariables(FilteredVariables): def _execute_operation(self): """ Filter variables from the dataset based on specified criteria. @@ -11,21 +10,7 @@ def _execute_operation(self): - key_name: The metadata key to filter by (e.g., "role", "type", etc.) - key_value: The value to match for the filter key (e.g., "Timing", "Identifier", etc.) """ - filter_key = self.params.key_name - filter_value = self.params.key_value - - # Get variables metadata from the standard model for the current domain - variables_metadata: List[dict] = self._get_variables_metadata_from_standard() - - # Filter variables based on the specified criteria - filtered_variables = [ - var for var in variables_metadata if var.get(filter_key) == filter_value - ] - - # Replace variable wildcards with actual domain names - variable_names_list = self._replace_variable_wildcards( - filtered_variables, self.params.domain - ) + variable_names_list = super()._execute_operation() # Get actual column names from the dataset that match our filtered list dataset_columns = self.params.dataframe.columns.tolist() diff --git a/cdisc_rules_engine/operations/operations_factory.py b/cdisc_rules_engine/operations/operations_factory.py index 1c01a086a..c272f6d57 100644 --- a/cdisc_rules_engine/operations/operations_factory.py +++ b/cdisc_rules_engine/operations/operations_factory.py @@ -10,6 +10,7 @@ ) from cdisc_rules_engine.operations.distinct import Distinct from cdisc_rules_engine.operations.extract_metadata import ExtractMetadata +from cdisc_rules_engine.operations.filtered_variables import FilteredVariables from cdisc_rules_engine.operations.get_xhtml_errors import GetXhtmlErrors from cdisc_rules_engine.operations.library_column_order import LibraryColumnOrder from cdisc_rules_engine.operations.library_model_column_order import ( @@ -99,6 +100,7 @@ class OperationsFactory(FactoryInterface): "distinct": Distinct, "dy": DayDataValidator, "extract_metadata": ExtractMetadata, + "filtered_variables": FilteredVariables, "get_column_order_from_dataset": DatasetColumnOrder, "get_column_order_from_library": LibraryColumnOrder, "get_codelist_attributes": CodeListAttributes, diff --git a/cdisc_rules_engine/operations/permissible_variables.py b/cdisc_rules_engine/operations/permissible_variables.py index 0dcbdf89b..bb81d3985 100644 --- a/cdisc_rules_engine/operations/permissible_variables.py +++ b/cdisc_rules_engine/operations/permissible_variables.py @@ -1,9 +1,8 @@ -from cdisc_rules_engine.operations.base_operation import BaseOperation -from cdisc_rules_engine.constants.permissibility import PERMISSIBLE -from typing import List +from cdisc_rules_engine.constants.permissibility import PERMISSIBILITY_KEY, PERMISSIBLE +from cdisc_rules_engine.operations.filtered_variables import FilteredVariables -class PermissibleVariables(BaseOperation): +class PermissibleVariables(FilteredVariables): def _execute_operation(self): """ Fetches required variables for a given domain from the CDISC library. @@ -17,14 +16,6 @@ def _execute_operation(self): The lists with column names are sorted in accordance to "ordinal" key of library metadata. """ - - # get variables metadata from the standard model - variables_metadata: List[dict] = self._get_variables_metadata_from_standard() - - return list( - { - var["name"].replace("--", self.params.domain): None - for var in variables_metadata - if self.get_allowed_variable_permissibility(var) == PERMISSIBLE - }.keys() - ) + self.params.key_name = PERMISSIBILITY_KEY + self.params.key_value = PERMISSIBLE + return super()._execute_operation() diff --git a/cdisc_rules_engine/operations/required_variables.py b/cdisc_rules_engine/operations/required_variables.py index 167d748df..01c887df6 100644 --- a/cdisc_rules_engine/operations/required_variables.py +++ b/cdisc_rules_engine/operations/required_variables.py @@ -1,9 +1,8 @@ -from typing import List -from cdisc_rules_engine.operations.base_operation import BaseOperation -from cdisc_rules_engine.constants.permissibility import REQUIRED +from cdisc_rules_engine.constants.permissibility import PERMISSIBILITY_KEY, REQUIRED +from cdisc_rules_engine.operations.filtered_variables import FilteredVariables -class RequiredVariables(BaseOperation): +class RequiredVariables(FilteredVariables): def _execute_operation(self): """ Fetches required variables for a given domain from the CDISC library. @@ -17,13 +16,6 @@ def _execute_operation(self): The lists with column names are sorted in accordance to "ordinal" key of library metadata. """ - - # get variables metadata from the standard model - variables_metadata: List[dict] = self._get_variables_metadata_from_standard() - return list( - { - var["name"].replace("--", self.params.domain): None - for var in variables_metadata - if self.get_allowed_variable_permissibility(var) == REQUIRED - }.keys() - ) + self.params.key_name = PERMISSIBILITY_KEY + self.params.key_value = REQUIRED + return super()._execute_operation() diff --git a/cdisc_rules_engine/utilities/sdtm_utilities.py b/cdisc_rules_engine/utilities/sdtm_utilities.py index 3d7304d8d..b951f5b5e 100644 --- a/cdisc_rules_engine/utilities/sdtm_utilities.py +++ b/cdisc_rules_engine/utilities/sdtm_utilities.py @@ -10,6 +10,10 @@ FINDINGS_ABOUT, FINDINGS_TEST_VARIABLE, ) +from cdisc_rules_engine.constants.permissibility import ( + PERMISSIBILITY_DEFAULT, + PERMISSIBILITY_KEY, +) from cdisc_rules_engine.enums.variable_roles import VariableRoles from cdisc_rules_engine.models.library_metadata_container import ( LibraryMetadataContainer, @@ -181,6 +185,7 @@ def get_variables_metadata_from_standard( # noqa ) else: variables_metadata = ig_variables + set_default_variable_permissibility(variables_metadata) return variables_metadata @@ -361,6 +366,7 @@ def get_variables_metadata_from_standard_model( # noqa timing_metadata, ]: replace_variable_wildcards(var_list, original_domain, variables_metadata) + set_default_variable_permissibility(variables_metadata) return variables_metadata else: # First, try to get class metadata and check for classVariables @@ -382,6 +388,7 @@ def get_variables_metadata_from_standard_model( # noqa replace_variable_wildcards( class_variables, original_domain, variables_metadata ) + set_default_variable_permissibility(variables_metadata) return variables_metadata else: # Second, check if domain exists in model datasets @@ -404,6 +411,7 @@ def get_variables_metadata_from_standard_model( # noqa dataset_variables, original_domain, variables_metadata ) variables_metadata.sort(key=lambda item: int(item["ordinal"])) + set_default_variable_permissibility(variables_metadata) return variables_metadata # Third, fall back to standard datasets if IG_domain_details: @@ -423,6 +431,7 @@ def get_variables_metadata_from_standard_model( # noqa replace_variable_wildcards( dataset_variables, original_domain, variables_metadata ) + set_default_variable_permissibility(variables_metadata) return variables_metadata return None @@ -445,6 +454,12 @@ def replace_variable_wildcards(var_list, domain, target_list): target_list.append(var_copy) +def set_default_variable_permissibility(var_list): + for variable_metadata in var_list: + if PERMISSIBILITY_KEY not in variable_metadata: + variable_metadata[PERMISSIBILITY_KEY] = PERMISSIBILITY_DEFAULT + + def get_all_model_wildcard_variables(model_details: dict): return { classVariable["name"] diff --git a/resources/schema/rule/Operations.json b/resources/schema/rule/Operations.json index 0a8879851..695452c38 100644 --- a/resources/schema/rule/Operations.json +++ b/resources/schema/rule/Operations.json @@ -102,6 +102,15 @@ "required": ["id", "operator"], "type": "object" }, + { + "properties": { + "operator": { + "const": "filtered_variables" + } + }, + "required": ["id", "operator", "key_name", "key_value"], + "type": "object" + }, { "properties": { "operator": { diff --git a/resources/schema/rule/Operations.md b/resources/schema/rule/Operations.md index 7615f980b..9d78eb59a 100644 --- a/resources/schema/rule/Operations.md +++ b/resources/schema/rule/Operations.md @@ -582,6 +582,17 @@ FA Operations for working with Implementation Guide and model variable metadata. +### filtered_variables + +Filters variables from the IG and model based on specified metadata criteria. + +```yaml +- operator: get_dataset_filtered_variables + id: $expected_variables + key_name: "role" # role, core, etc + key_value: "Exp" # Timing, Req, Exp, Perm, etc +``` + ### expected_variables Returns the expected ("Core" = Exp ) variables for the domain in the current standard Variable Metadata for custom domains will pull from the model while non-custom domains will be from the IG and Model. @@ -751,7 +762,7 @@ Output Filters variables from the dataset based on specified metadata criteria. Returns a list of variable names that exist in the dataset and match the filter criteria. ```yaml -- operation: get_dataset_filtered_variables +- operator: get_dataset_filtered_variables id: $timing_variables key_name: "role" key_value: "Timing" @@ -834,6 +845,7 @@ Output "Record Qualifier" ], "$qlabel_referenced_variable_metadata_ordinal": [44, null, 38], + "$qlabel_referenced_variable_metadata_core": ["Req", "Req", "Req"], "$qlabel_referenced_variable_metadata_label": ["Toxicity", null, "Analysis Method"] } ``` @@ -882,6 +894,7 @@ Output "Record Qualifier" ], "$qnam_referenced_variable_metadata_ordinal": [44, null, 38], + "$qnam_referenced_variable_metadata_core": ["Req", "Req", "Req"], "$qnam_referenced_variable_metadata_label": ["Toxicity", null, "Analysis Method"] } ``` @@ -1095,7 +1108,7 @@ Example: return the number of records grouped by USUBJID and timing variables, e Example: return the number of records where QNAM starts with "RACE" (matches RACE1, RACE2, RACE3, etc.) per USUBJID. ```yaml -- operation: record_count +- operator: record_count id: $race_records_in_dataset filter: QNAM: "RACE&" diff --git a/tests/unit/test_operations/test_label_referenced_variable_metadata.py b/tests/unit/test_operations/test_label_referenced_variable_metadata.py index 62ec54006..0e78e5e9c 100644 --- a/tests/unit/test_operations/test_label_referenced_variable_metadata.py +++ b/tests/unit/test_operations/test_label_referenced_variable_metadata.py @@ -205,6 +205,7 @@ def mock_cached_method(*args, **kwargs): "$label_referenced_variable_name", "$label_referenced_variable_role", "$label_referenced_variable_ordinal", + "$label_referenced_variable_core", "$label_referenced_variable_label", ] diff --git a/tests/unit/test_operations/test_name_referenced_variable_metadata.py b/tests/unit/test_operations/test_name_referenced_variable_metadata.py index d9574b6e0..8674074b7 100644 --- a/tests/unit/test_operations/test_name_referenced_variable_metadata.py +++ b/tests/unit/test_operations/test_name_referenced_variable_metadata.py @@ -205,6 +205,7 @@ def mock_cached_method(*args, **kwargs): "$name_referenced_variable_name", "$name_referenced_variable_role", "$name_referenced_variable_ordinal", + "$name_referenced_variable_core", "$name_referenced_variable_label", ] From 9996c3384f48fc6706499e5cfd07cb9e11c94e8e Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 19 Mar 2026 19:23:00 +0000 Subject: [PATCH 2/5] Update merged schema files with markdown descriptions --- resources/schema/rule-merged/Operations.json | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/resources/schema/rule-merged/Operations.json b/resources/schema/rule-merged/Operations.json index 562d87898..53b51a754 100644 --- a/resources/schema/rule-merged/Operations.json +++ b/resources/schema/rule-merged/Operations.json @@ -113,6 +113,16 @@ "required": ["id", "operator"], "type": "object" }, + { + "properties": { + "operator": { + "const": "filtered_variables", + "markdownDescription": "\nFilters variables from the IG and model based on specified metadata criteria.\n\n```yaml\n- operator: get_dataset_filtered_variables\n id: $expected_variables\n key_name: \"role\" # role, core, etc\n key_value: \"Exp\" # Timing, Req, Exp, Perm, etc\n```\n" + } + }, + "required": ["id", "operator", "key_name", "key_value"], + "type": "object" + }, { "properties": { "operator": { @@ -187,7 +197,7 @@ "properties": { "operator": { "const": "get_dataset_filtered_variables", - "markdownDescription": "\nFilters variables from the dataset based on specified metadata criteria. Returns a list of variable names that exist in the dataset and match the filter criteria.\n\n```yaml\n- operation: get_dataset_filtered_variables\n id: $timing_variables\n key_name: \"role\"\n key_value: \"Timing\"\n```\n" + "markdownDescription": "\nFilters variables from the dataset based on specified metadata criteria. Returns a list of variable names that exist in the dataset and match the filter criteria.\n\n```yaml\n- operator: get_dataset_filtered_variables\n id: $timing_variables\n key_name: \"role\"\n key_value: \"Timing\"\n```\n" } }, "required": ["id", "operator", "key_name", "key_value"], @@ -197,7 +207,7 @@ "properties": { "operator": { "const": "label_referenced_variable_metadata", - "markdownDescription": "\nGenerates a dataframe where each record in the dataframe is the library ig variable metadata corresponding with the variable label found in the column provided in name. The metadata column names are prefixed with the string provided in `id`.\n\nInput\n\nTarget Dataset: SUPPLB\n\nProduct: sdtmig\n\nVersion: 3-4\n\nDataset:\n\n```\n{\n \"STUDYID\": [\"STUDY1\", \"STUDY1\", \"STUDY1\"],\n \"USUBJID\": [\"SUBJ1\", \"SUBJ1\", \"SUBJ1\"],\n \"QLABEL\": [\"Toxicity\", \"Viscosity\", \"Analysis Method\"]\n}\n```\n\nRule:\n\n```yaml\n- operator: label_referenced_variable_metadata\n id: $qlabel_referenced_variable_metadata\n name: \"QLABEL\"\n```\n\nOutput\n\n```\n{\n \"STUDYID\": [\"STUDY1\", \"STUDY1\", \"STUDY1\"],\n \"USUBJID\": [\"SUBJ1\", \"SUBJ1\", \"SUBJ1\"],\n \"QLABEL\": [\"Toxicity\", \"Viscosity\", \"Analysis Method\"],\n \"$qlabel_referenced_variable_metadata_name\": [\"LBTOX\", null, \"LBANMETH\"],\n \"$qlabel_referenced_variable_metadata_role\": [\n \"Variable Qualifier\",\n null,\n \"Record Qualifier\"\n ],\n \"$qlabel_referenced_variable_metadata_ordinal\": [44, null, 38],\n \"$qlabel_referenced_variable_metadata_label\": [\"Toxicity\", null, \"Analysis Method\"]\n}\n```\n" + "markdownDescription": "\nGenerates a dataframe where each record in the dataframe is the library ig variable metadata corresponding with the variable label found in the column provided in name. The metadata column names are prefixed with the string provided in `id`.\n\nInput\n\nTarget Dataset: SUPPLB\n\nProduct: sdtmig\n\nVersion: 3-4\n\nDataset:\n\n```\n{\n \"STUDYID\": [\"STUDY1\", \"STUDY1\", \"STUDY1\"],\n \"USUBJID\": [\"SUBJ1\", \"SUBJ1\", \"SUBJ1\"],\n \"QLABEL\": [\"Toxicity\", \"Viscosity\", \"Analysis Method\"]\n}\n```\n\nRule:\n\n```yaml\n- operator: label_referenced_variable_metadata\n id: $qlabel_referenced_variable_metadata\n name: \"QLABEL\"\n```\n\nOutput\n\n```\n{\n \"STUDYID\": [\"STUDY1\", \"STUDY1\", \"STUDY1\"],\n \"USUBJID\": [\"SUBJ1\", \"SUBJ1\", \"SUBJ1\"],\n \"QLABEL\": [\"Toxicity\", \"Viscosity\", \"Analysis Method\"],\n \"$qlabel_referenced_variable_metadata_name\": [\"LBTOX\", null, \"LBANMETH\"],\n \"$qlabel_referenced_variable_metadata_role\": [\n \"Variable Qualifier\",\n null,\n \"Record Qualifier\"\n ],\n \"$qlabel_referenced_variable_metadata_ordinal\": [44, null, 38],\n \"$qlabel_referenced_variable_metadata_core\": [\"Req\", \"Req\", \"Req\"],\n \"$qlabel_referenced_variable_metadata_label\": [\"Toxicity\", null, \"Analysis Method\"]\n}\n```\n" } }, "required": ["id", "operator", "name"], @@ -277,7 +287,7 @@ "properties": { "operator": { "const": "name_referenced_variable_metadata", - "markdownDescription": "\nGenerates a dataframe where each record in the dataframe is the library ig variable metadata corresponding with the variable name found in the column provided in name. The metadata column names are prefixed with the string provided in `id`.\n\nInput\n\nTarget Dataset: SUPPLB\n\nProduct: sdtmig\n\nVersion: 3-4\n\nDataset:\n\n```\n{\n \"STUDYID\": [\"STUDY1\", \"STUDY1\", \"STUDY1\"],\n \"USUBJID\": [\"SUBJ1\", \"SUBJ1\", \"SUBJ1\"],\n \"QNAM\": [\"Toxicity\", \"LBVISCOS\", \"Analysis Method\"]\n}\n```\n\nRule:\n\n```yaml\n- operator: name_referenced_variable_metadata\n id: $qnam_referenced_variable_metadata\n name: \"QNAM\"\n```\n\nOutput\n\n```\n{\n \"STUDYID\": [\"STUDY1\", \"STUDY1\", \"STUDY1\"],\n \"USUBJID\": [\"SUBJ1\", \"SUBJ1\", \"SUBJ1\"],\n \"QNAM\": [\"LBTOX\", \"LBVISCOS\", \"LBANMETH\"],\n \"$qnam_referenced_variable_metadata_name\": [\"LBTOX\", null, \"LBANMETH\"],\n \"$qnam_referenced_variable_metadata_role\": [\n \"Variable Qualifier\",\n null,\n \"Record Qualifier\"\n ],\n \"$qnam_referenced_variable_metadata_ordinal\": [44, null, 38],\n \"$qnam_referenced_variable_metadata_label\": [\"Toxicity\", null, \"Analysis Method\"]\n}\n```\n" + "markdownDescription": "\nGenerates a dataframe where each record in the dataframe is the library ig variable metadata corresponding with the variable name found in the column provided in name. The metadata column names are prefixed with the string provided in `id`.\n\nInput\n\nTarget Dataset: SUPPLB\n\nProduct: sdtmig\n\nVersion: 3-4\n\nDataset:\n\n```\n{\n \"STUDYID\": [\"STUDY1\", \"STUDY1\", \"STUDY1\"],\n \"USUBJID\": [\"SUBJ1\", \"SUBJ1\", \"SUBJ1\"],\n \"QNAM\": [\"Toxicity\", \"LBVISCOS\", \"Analysis Method\"]\n}\n```\n\nRule:\n\n```yaml\n- operator: name_referenced_variable_metadata\n id: $qnam_referenced_variable_metadata\n name: \"QNAM\"\n```\n\nOutput\n\n```\n{\n \"STUDYID\": [\"STUDY1\", \"STUDY1\", \"STUDY1\"],\n \"USUBJID\": [\"SUBJ1\", \"SUBJ1\", \"SUBJ1\"],\n \"QNAM\": [\"LBTOX\", \"LBVISCOS\", \"LBANMETH\"],\n \"$qnam_referenced_variable_metadata_name\": [\"LBTOX\", null, \"LBANMETH\"],\n \"$qnam_referenced_variable_metadata_role\": [\n \"Variable Qualifier\",\n null,\n \"Record Qualifier\"\n ],\n \"$qnam_referenced_variable_metadata_ordinal\": [44, null, 38],\n \"$qnam_referenced_variable_metadata_core\": [\"Req\", \"Req\", \"Req\"],\n \"$qnam_referenced_variable_metadata_label\": [\"Toxicity\", null, \"Analysis Method\"]\n}\n```\n" } }, "required": ["id", "operator", "name"], @@ -297,7 +307,7 @@ "properties": { "operator": { "const": "record_count", - "markdownDescription": "\nIf no filter or group is provided, returns the number of records in the dataset. If filter is provided, returns the number of records in the dataset that contain the value(s) in the corresponding column(s) provided in the filter. Filter can have a wildcard `&` that when added to the end of the filter value will look for all instances of that prefix (see 4th example below). If group is provided, returns the number of rows matching each unique set of the grouping variables. These can be static column name(s) or can be derived from other operations like get_dataset_filtered_variables.\n\nIf both filter and group are provided, returns the number of records in the dataset that contain the value(s) in the corresponding column(s) provided in the filter that also match each unique set of the grouping variables.\n\n**Wildcard Filtering:** Filter values ending with % will match any records where the column value starts with the specified prefix. For example, RACE% will match RACE1, RACE2, RACE3, etc. This is useful for matching related variables with numeric or alphabetic suffixes.\n\n**Regex Transformation:** If regex is provided along with group, the regex pattern will be applied to transform grouping column values before grouping. The regex is only applied to columns where the pattern matches the data type. For example, using regex `^\\d{4}-\\d{2}-\\d{2}` on a column containing `2022-01-14T08:00` will extract `2022-01-14` for grouping purposes.\n\nIf group is provided, group_aliases may also be provided to assign new grouping variable names so that results grouped by the values in one set of grouping variables can be merged onto a dataset according to the same grouping value(s) stored in different set of grouping variables. When both group and group_aliases are provided, columns are renamed according to corresponding list position (i.e., the 1st column in group is renamed to the 1st column in group_aliases, etc.). If there are more columns listed in group than in group_aliases, only the group columns with corresponding group_aliases columns will be renamed. If there are more columns listed in group_aliases than in group, the extra column names in group_aliases will be ignored.\n\nExample: return the number of records in a dataset.\n\n```yaml\n- operator: record_count\n id: $records_in_dataset\n```\n\nExample: return the number of records where STUDYID = \"CDISC01\" and FLAGVAR = \"Y\".\n\n```yaml\n- operator: record_count\n id: $flagged_cdisc01_records_in_dataset\n filter:\n STUDYID: \"CDISC01\"\n FLAGVAR: \"Y\"\n```\n\nExample: return the number of records grouped by USUBJID and timing variables, extracting only the date portion from datetime values.\n\n```yaml\n- operator: record_count\n id: $records_per_usubjid_date\n group:\n - USUBJID\n - --TESTCD\n - $TIMING_VARIABLES\n regex: \"^\\d{4}-\\d{2}-\\d{2}\"\n```\n\nExample: return the number of records where QNAM starts with \"RACE\" (matches RACE1, RACE2, RACE3, etc.) per USUBJID.\n\n```yaml\n- operation: record_count\n id: $race_records_in_dataset\n filter:\n QNAM: \"RACE&\"\n group:\n - \"USUBJID\"\n```\n\nExample: return the number of records grouped by USUBJID.\n\n```yaml\n- operator: record_count\n id: $records_per_usubjid\n group:\n - USUBJID\n```\n\nExample: return the number of records grouped by USUBJID where FLAGVAR = \"Y\".\n\n```yaml\n- operator: record_count\n id: $flagged_records_per_usubjid\n group:\n - USUBJID\n filter:\n FLAGVAR: \"Y\"\n```\n\nExample: return the number of records grouped by USUBJID and IDVARVAL where QNAM = \"TEST1\" and IDVAR = \"GROUPID\", renaming the IDVARVAL column to GROUPID for subsequent merging.\n\n```yaml\n- operator: record_count\n id: $test1_records_per_usubjid_groupid\n group:\n - USUBJID\n - IDVARVAL\n filter:\n QNAM: \"TEST1\"\n IDVAR: \"GROUPID\"\n group_aliases:\n - USUBJID\n - GROUPID\n```\n\nExample: Group the StudyIdentifier dataset by parent_id and merge the result back to the context dataset StudyVersion using StudyVersion.id == StudyIdentifier.parent_id\n\n```yaml\nScope:\n Entities:\n Include:\n - StudyVersion\nOperations:\n - domain: StudyIdentifier\n filter:\n parent_entity: \"StudyVersion\"\n parent_rel: \"studyIdentifiers\"\n rel_type: \"definition\"\n studyIdentifierScope.organizationType.code: \"C70793\"\n studyIdentifierScope.organizationType.codeSystem: \"http://www.cdisc.org\"\n group:\n - parent_id\n group_aliases:\n - id\n id: $num_sponsor_ids\n operator: record_count\n```\n" + "markdownDescription": "\nIf no filter or group is provided, returns the number of records in the dataset. If filter is provided, returns the number of records in the dataset that contain the value(s) in the corresponding column(s) provided in the filter. Filter can have a wildcard `&` that when added to the end of the filter value will look for all instances of that prefix (see 4th example below). If group is provided, returns the number of rows matching each unique set of the grouping variables. These can be static column name(s) or can be derived from other operations like get_dataset_filtered_variables.\n\nIf both filter and group are provided, returns the number of records in the dataset that contain the value(s) in the corresponding column(s) provided in the filter that also match each unique set of the grouping variables.\n\n**Wildcard Filtering:** Filter values ending with % will match any records where the column value starts with the specified prefix. For example, RACE% will match RACE1, RACE2, RACE3, etc. This is useful for matching related variables with numeric or alphabetic suffixes.\n\n**Regex Transformation:** If regex is provided along with group, the regex pattern will be applied to transform grouping column values before grouping. The regex is only applied to columns where the pattern matches the data type. For example, using regex `^\\d{4}-\\d{2}-\\d{2}` on a column containing `2022-01-14T08:00` will extract `2022-01-14` for grouping purposes.\n\nIf group is provided, group_aliases may also be provided to assign new grouping variable names so that results grouped by the values in one set of grouping variables can be merged onto a dataset according to the same grouping value(s) stored in different set of grouping variables. When both group and group_aliases are provided, columns are renamed according to corresponding list position (i.e., the 1st column in group is renamed to the 1st column in group_aliases, etc.). If there are more columns listed in group than in group_aliases, only the group columns with corresponding group_aliases columns will be renamed. If there are more columns listed in group_aliases than in group, the extra column names in group_aliases will be ignored.\n\nExample: return the number of records in a dataset.\n\n```yaml\n- operator: record_count\n id: $records_in_dataset\n```\n\nExample: return the number of records where STUDYID = \"CDISC01\" and FLAGVAR = \"Y\".\n\n```yaml\n- operator: record_count\n id: $flagged_cdisc01_records_in_dataset\n filter:\n STUDYID: \"CDISC01\"\n FLAGVAR: \"Y\"\n```\n\nExample: return the number of records grouped by USUBJID and timing variables, extracting only the date portion from datetime values.\n\n```yaml\n- operator: record_count\n id: $records_per_usubjid_date\n group:\n - USUBJID\n - --TESTCD\n - $TIMING_VARIABLES\n regex: \"^\\d{4}-\\d{2}-\\d{2}\"\n```\n\nExample: return the number of records where QNAM starts with \"RACE\" (matches RACE1, RACE2, RACE3, etc.) per USUBJID.\n\n```yaml\n- operator: record_count\n id: $race_records_in_dataset\n filter:\n QNAM: \"RACE&\"\n group:\n - \"USUBJID\"\n```\n\nExample: return the number of records grouped by USUBJID.\n\n```yaml\n- operator: record_count\n id: $records_per_usubjid\n group:\n - USUBJID\n```\n\nExample: return the number of records grouped by USUBJID where FLAGVAR = \"Y\".\n\n```yaml\n- operator: record_count\n id: $flagged_records_per_usubjid\n group:\n - USUBJID\n filter:\n FLAGVAR: \"Y\"\n```\n\nExample: return the number of records grouped by USUBJID and IDVARVAL where QNAM = \"TEST1\" and IDVAR = \"GROUPID\", renaming the IDVARVAL column to GROUPID for subsequent merging.\n\n```yaml\n- operator: record_count\n id: $test1_records_per_usubjid_groupid\n group:\n - USUBJID\n - IDVARVAL\n filter:\n QNAM: \"TEST1\"\n IDVAR: \"GROUPID\"\n group_aliases:\n - USUBJID\n - GROUPID\n```\n\nExample: Group the StudyIdentifier dataset by parent_id and merge the result back to the context dataset StudyVersion using StudyVersion.id == StudyIdentifier.parent_id\n\n```yaml\nScope:\n Entities:\n Include:\n - StudyVersion\nOperations:\n - domain: StudyIdentifier\n filter:\n parent_entity: \"StudyVersion\"\n parent_rel: \"studyIdentifiers\"\n rel_type: \"definition\"\n studyIdentifierScope.organizationType.code: \"C70793\"\n studyIdentifierScope.organizationType.codeSystem: \"http://www.cdisc.org\"\n group:\n - parent_id\n group_aliases:\n - id\n id: $num_sponsor_ids\n operator: record_count\n```\n" } }, "required": ["id", "operator"], From 6b69aeea514e3a054b0d207192c7028495effd2a Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Thu, 19 Mar 2026 15:50:52 -0400 Subject: [PATCH 3/5] use library_column_order instead --- .../operations/expected_variables.py | 4 +-- .../operations/filtered_variables.py | 30 ------------------- .../get_dataset_filtered_variables.py | 4 +-- .../operations/library_column_order.py | 30 ++++++++++++++----- .../operations/operations_factory.py | 2 -- .../operations/permissible_variables.py | 4 +-- .../operations/required_variables.py | 4 +-- resources/schema/rule-merged/Operations.json | 12 +------- resources/schema/rule/Operations.json | 9 ------ resources/schema/rule/Operations.md | 14 ++------- 10 files changed, 34 insertions(+), 79 deletions(-) delete mode 100644 cdisc_rules_engine/operations/filtered_variables.py diff --git a/cdisc_rules_engine/operations/expected_variables.py b/cdisc_rules_engine/operations/expected_variables.py index 06f98f1fa..897bf4b97 100644 --- a/cdisc_rules_engine/operations/expected_variables.py +++ b/cdisc_rules_engine/operations/expected_variables.py @@ -2,10 +2,10 @@ EXPECTED, PERMISSIBILITY_KEY, ) -from cdisc_rules_engine.operations.filtered_variables import FilteredVariables +from cdisc_rules_engine.operations.library_column_order import LibraryColumnOrder -class ExpectedVariables(FilteredVariables): +class ExpectedVariables(LibraryColumnOrder): def _execute_operation(self): """ Fetches required variables for a given domain from the CDISC library. diff --git a/cdisc_rules_engine/operations/filtered_variables.py b/cdisc_rules_engine/operations/filtered_variables.py deleted file mode 100644 index 57e604a45..000000000 --- a/cdisc_rules_engine/operations/filtered_variables.py +++ /dev/null @@ -1,30 +0,0 @@ -from typing import List -from cdisc_rules_engine.operations.base_operation import BaseOperation - - -class FilteredVariables(BaseOperation): - def _execute_operation(self): - """ - Filter variables from the library based on specified criteria. - - Expected parameters: - - key_name: The metadata key to filter by (e.g., "role", "type", etc.) - - key_value: The value to match for the filter key (e.g., "Timing", "Identifier", etc.) - """ - filter_key = self.params.key_name - filter_value = self.params.key_value - - # Get variables metadata from the standard model for the current domain - variables_metadata: List[dict] = self._get_variables_metadata_from_standard() - - # Filter variables based on the specified criteria - filtered_variables = [ - var for var in variables_metadata if var.get(filter_key) == filter_value - ] - - # Replace variable wildcards with actual domain names - variable_names_list = self._replace_variable_wildcards( - filtered_variables, self.params.domain - ) - - return variable_names_list diff --git a/cdisc_rules_engine/operations/get_dataset_filtered_variables.py b/cdisc_rules_engine/operations/get_dataset_filtered_variables.py index 967953482..88c8109e5 100644 --- a/cdisc_rules_engine/operations/get_dataset_filtered_variables.py +++ b/cdisc_rules_engine/operations/get_dataset_filtered_variables.py @@ -1,7 +1,7 @@ -from cdisc_rules_engine.operations.filtered_variables import FilteredVariables +from cdisc_rules_engine.operations.library_column_order import LibraryColumnOrder -class GetDatasetFilteredVariables(FilteredVariables): +class GetDatasetFilteredVariables(LibraryColumnOrder): def _execute_operation(self): """ Filter variables from the dataset based on specified criteria. diff --git a/cdisc_rules_engine/operations/library_column_order.py b/cdisc_rules_engine/operations/library_column_order.py index 2aec11ede..3168119b0 100644 --- a/cdisc_rules_engine/operations/library_column_order.py +++ b/cdisc_rules_engine/operations/library_column_order.py @@ -1,6 +1,5 @@ from cdisc_rules_engine.operations.base_operation import BaseOperation from typing import List -from collections import OrderedDict class LibraryColumnOrder(BaseOperation): @@ -16,13 +15,28 @@ def _execute_operation(self): Length of Series is equal to the length of given dataframe. The lists with column names are sorted in accordance to "ordinal" key of library metadata. - """ - # get variables metadata , for custom domains from model; for non-custom from IG and model + If key_name and key_value are provided, filter variables based on specified criteria. + + Optional parameters: + - key_name: The metadata key to filter by (e.g., "role", "type", etc.) + - key_value: The value to match for the filter key (e.g., "Timing", "Identifier", etc.) + """ + # Get variables metadata from the standard model for the current domain variables_metadata: List[dict] = self._get_variables_metadata_from_standard() - # create a list of variable names in accordance to the "ordinal" key - variable_names_list = [ - var["name"].replace("--", self.params.domain) for var in variables_metadata - ] - return list(OrderedDict.fromkeys(variable_names_list)) + # Filter variables based on the specified criteria + + if self.params.key_name and self.params.key_value: + variables_metadata = [ + var + for var in variables_metadata + if var.get(self.params.key_name) == self.params.key_value + ] + + # Replace variable wildcards with actual domain names + variable_names_list = self._replace_variable_wildcards( + variables_metadata, self.params.domain + ) + + return variable_names_list diff --git a/cdisc_rules_engine/operations/operations_factory.py b/cdisc_rules_engine/operations/operations_factory.py index c272f6d57..1c01a086a 100644 --- a/cdisc_rules_engine/operations/operations_factory.py +++ b/cdisc_rules_engine/operations/operations_factory.py @@ -10,7 +10,6 @@ ) from cdisc_rules_engine.operations.distinct import Distinct from cdisc_rules_engine.operations.extract_metadata import ExtractMetadata -from cdisc_rules_engine.operations.filtered_variables import FilteredVariables from cdisc_rules_engine.operations.get_xhtml_errors import GetXhtmlErrors from cdisc_rules_engine.operations.library_column_order import LibraryColumnOrder from cdisc_rules_engine.operations.library_model_column_order import ( @@ -100,7 +99,6 @@ class OperationsFactory(FactoryInterface): "distinct": Distinct, "dy": DayDataValidator, "extract_metadata": ExtractMetadata, - "filtered_variables": FilteredVariables, "get_column_order_from_dataset": DatasetColumnOrder, "get_column_order_from_library": LibraryColumnOrder, "get_codelist_attributes": CodeListAttributes, diff --git a/cdisc_rules_engine/operations/permissible_variables.py b/cdisc_rules_engine/operations/permissible_variables.py index bb81d3985..3477eba51 100644 --- a/cdisc_rules_engine/operations/permissible_variables.py +++ b/cdisc_rules_engine/operations/permissible_variables.py @@ -1,8 +1,8 @@ from cdisc_rules_engine.constants.permissibility import PERMISSIBILITY_KEY, PERMISSIBLE -from cdisc_rules_engine.operations.filtered_variables import FilteredVariables +from cdisc_rules_engine.operations.library_column_order import LibraryColumnOrder -class PermissibleVariables(FilteredVariables): +class PermissibleVariables(LibraryColumnOrder): def _execute_operation(self): """ Fetches required variables for a given domain from the CDISC library. diff --git a/cdisc_rules_engine/operations/required_variables.py b/cdisc_rules_engine/operations/required_variables.py index 01c887df6..efe72f370 100644 --- a/cdisc_rules_engine/operations/required_variables.py +++ b/cdisc_rules_engine/operations/required_variables.py @@ -1,8 +1,8 @@ from cdisc_rules_engine.constants.permissibility import PERMISSIBILITY_KEY, REQUIRED -from cdisc_rules_engine.operations.filtered_variables import FilteredVariables +from cdisc_rules_engine.operations.library_column_order import LibraryColumnOrder -class RequiredVariables(FilteredVariables): +class RequiredVariables(LibraryColumnOrder): def _execute_operation(self): """ Fetches required variables for a given domain from the CDISC library. diff --git a/resources/schema/rule-merged/Operations.json b/resources/schema/rule-merged/Operations.json index 53b51a754..a93003e72 100644 --- a/resources/schema/rule-merged/Operations.json +++ b/resources/schema/rule-merged/Operations.json @@ -113,16 +113,6 @@ "required": ["id", "operator"], "type": "object" }, - { - "properties": { - "operator": { - "const": "filtered_variables", - "markdownDescription": "\nFilters variables from the IG and model based on specified metadata criteria.\n\n```yaml\n- operator: get_dataset_filtered_variables\n id: $expected_variables\n key_name: \"role\" # role, core, etc\n key_value: \"Exp\" # Timing, Req, Exp, Perm, etc\n```\n" - } - }, - "required": ["id", "operator", "key_name", "key_value"], - "type": "object" - }, { "properties": { "operator": { @@ -147,7 +137,7 @@ "properties": { "operator": { "const": "get_column_order_from_library", - "markdownDescription": "\nFetches column order for a given domain from the CDISC library. The lists with column names are sorted in accordance to \"ordinal\" key of library metadata.\n\nRule Type: Variable Metadata Check\n\n```yaml\nCheck:\n all:\n - name: variable_name\n operator: is_not_contained_by\n value: $ig_variables\nOperations:\n - id: $ig_variables\n operator: get_column_order_from_library\n```\n" + "markdownDescription": "\nFetches column order for a given domain from the CDISC library. The lists with column names are sorted in accordance to \"ordinal\" key of library metadata.\nOptionally Filters variables based on specified metadata criteria.\n\nRule Type: Variable Metadata Check\n\n```yaml\nCheck:\n all:\n - name: variable_name\n operator: is_not_contained_by\n value: $ig_variables\nOperations:\n - id: $ig_variables\n operator: get_column_order_from_library\n key_name: \"role\" # role, core, etc\n key_value: \"Exp\" # Timing, Req, Exp, Perm, etc\n```\n" } }, "required": ["id", "operator"], diff --git a/resources/schema/rule/Operations.json b/resources/schema/rule/Operations.json index 695452c38..0a8879851 100644 --- a/resources/schema/rule/Operations.json +++ b/resources/schema/rule/Operations.json @@ -102,15 +102,6 @@ "required": ["id", "operator"], "type": "object" }, - { - "properties": { - "operator": { - "const": "filtered_variables" - } - }, - "required": ["id", "operator", "key_name", "key_value"], - "type": "object" - }, { "properties": { "operator": { diff --git a/resources/schema/rule/Operations.md b/resources/schema/rule/Operations.md index 9d78eb59a..0d9b24bac 100644 --- a/resources/schema/rule/Operations.md +++ b/resources/schema/rule/Operations.md @@ -582,17 +582,6 @@ FA Operations for working with Implementation Guide and model variable metadata. -### filtered_variables - -Filters variables from the IG and model based on specified metadata criteria. - -```yaml -- operator: get_dataset_filtered_variables - id: $expected_variables - key_name: "role" # role, core, etc - key_value: "Exp" # Timing, Req, Exp, Perm, etc -``` - ### expected_variables Returns the expected ("Core" = Exp ) variables for the domain in the current standard Variable Metadata for custom domains will pull from the model while non-custom domains will be from the IG and Model. @@ -685,6 +674,7 @@ Output: ### get_column_order_from_library Fetches column order for a given domain from the CDISC library. The lists with column names are sorted in accordance to "ordinal" key of library metadata. +Optionally Filters variables based on specified metadata criteria. Rule Type: Variable Metadata Check @@ -697,6 +687,8 @@ Check: Operations: - id: $ig_variables operator: get_column_order_from_library + key_name: "role" # role, core, etc + key_value: "Exp" # Timing, Req, Exp, Perm, etc ``` ### get_model_column_order From b6a2c6034224f17ba048e856d5b9926d6cc0f467 Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Sat, 21 Mar 2026 10:34:53 -0400 Subject: [PATCH 4/5] fix params check in library_column_order --- cdisc_rules_engine/operations/library_column_order.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdisc_rules_engine/operations/library_column_order.py b/cdisc_rules_engine/operations/library_column_order.py index 3168119b0..ae4133e0b 100644 --- a/cdisc_rules_engine/operations/library_column_order.py +++ b/cdisc_rules_engine/operations/library_column_order.py @@ -27,7 +27,7 @@ def _execute_operation(self): # Filter variables based on the specified criteria - if self.params.key_name and self.params.key_value: + if self.params.key_name is not None and self.params.key_value is not None: variables_metadata = [ var for var in variables_metadata From 846023e53fe1393dc9df19dc537026e13395d899 Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Sat, 21 Mar 2026 10:51:19 -0400 Subject: [PATCH 5/5] only check key_name --- cdisc_rules_engine/operations/library_column_order.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdisc_rules_engine/operations/library_column_order.py b/cdisc_rules_engine/operations/library_column_order.py index ae4133e0b..cd49138ca 100644 --- a/cdisc_rules_engine/operations/library_column_order.py +++ b/cdisc_rules_engine/operations/library_column_order.py @@ -27,7 +27,7 @@ def _execute_operation(self): # Filter variables based on the specified criteria - if self.params.key_name is not None and self.params.key_value is not None: + if self.params.key_name: variables_metadata = [ var for var in variables_metadata