|
21 | 21 | from sagemaker.tensorflow import TensorFlow
|
22 | 22 | from sagemaker.pipeline import PipelineModel
|
23 | 23 | from sagemaker.model_monitor import DataCaptureConfig
|
| 24 | +from sagemaker.debugger import Rule, rule_configs, DebuggerHookConfig, CollectionConfig |
24 | 25 |
|
25 | 26 | from unittest.mock import MagicMock, patch
|
26 | 27 | from stepfunctions.steps.sagemaker import TrainingStep, TransformStep, ModelStep, EndpointStep, EndpointConfigStep
|
@@ -58,6 +59,54 @@ def pca_estimator():
|
58 | 59 |
|
59 | 60 | return pca
|
60 | 61 |
|
| 62 | +@pytest.fixture |
| 63 | +def pca_estimator_with_debug_hook(): |
| 64 | + s3_output_location = 's3://sagemaker/models' |
| 65 | + |
| 66 | + hook_config = DebuggerHookConfig( |
| 67 | + s3_output_path='s3://sagemaker/output/debug', |
| 68 | + hook_parameters={ |
| 69 | + "save_interval": "1" |
| 70 | + }, |
| 71 | + collection_configs=[ |
| 72 | + CollectionConfig("hyperparameters"), |
| 73 | + CollectionConfig("metrics") |
| 74 | + ] |
| 75 | + ) |
| 76 | + |
| 77 | + rules = [Rule.sagemaker(rule_configs.confusion(), |
| 78 | + rule_parameters={ |
| 79 | + "category_no": "15", |
| 80 | + "min_diag": "0.7", |
| 81 | + "max_off_diag": "0.3", |
| 82 | + "start_step": "17", |
| 83 | + "end_step": "19"} |
| 84 | + )] |
| 85 | + |
| 86 | + pca = sagemaker.estimator.Estimator( |
| 87 | + PCA_IMAGE, |
| 88 | + role=EXECUTION_ROLE, |
| 89 | + train_instance_count=1, |
| 90 | + train_instance_type='ml.c4.xlarge', |
| 91 | + output_path=s3_output_location, |
| 92 | + debugger_hook_config = hook_config, |
| 93 | + rules=rules |
| 94 | + ) |
| 95 | + |
| 96 | + pca.set_hyperparameters( |
| 97 | + feature_dim=50000, |
| 98 | + num_components=10, |
| 99 | + subtract_mean=True, |
| 100 | + algorithm_mode='randomized', |
| 101 | + mini_batch_size=200 |
| 102 | + ) |
| 103 | + |
| 104 | + pca.sagemaker_session = MagicMock() |
| 105 | + pca.sagemaker_session.boto_region_name = 'us-east-1' |
| 106 | + pca.sagemaker_session._default_bucket = 'sagemaker' |
| 107 | + |
| 108 | + return pca |
| 109 | + |
61 | 110 | @pytest.fixture
|
62 | 111 | def pca_model():
|
63 | 112 | model_data = 's3://sagemaker/models/pca.tar.gz'
|
@@ -95,6 +144,10 @@ def tensorflow_estimator():
|
95 | 144 | checkpoint_path='s3://sagemaker/models/sagemaker-tensorflow/checkpoints'
|
96 | 145 | )
|
97 | 146 |
|
| 147 | + estimator.debugger_hook_config = DebuggerHookConfig( |
| 148 | + s3_output_path='s3://sagemaker/models/debug' |
| 149 | + ) |
| 150 | + |
98 | 151 | estimator.sagemaker_session = MagicMock()
|
99 | 152 | estimator.sagemaker_session.boto_region_name = 'us-east-1'
|
100 | 153 | estimator.sagemaker_session._default_bucket = 'sagemaker'
|
@@ -148,6 +201,65 @@ def test_training_step_creation(pca_estimator):
|
148 | 201 | 'End': True
|
149 | 202 | }
|
150 | 203 |
|
| 204 | +@patch('botocore.client.BaseClient._make_api_call', new=mock_boto_api_call) |
| 205 | +def test_training_step_creation_with_debug_hook(pca_estimator_with_debug_hook): |
| 206 | + step = TrainingStep('Training', |
| 207 | + estimator=pca_estimator_with_debug_hook, |
| 208 | + job_name='TrainingJob') |
| 209 | + assert step.to_dict() == { |
| 210 | + 'Type': 'Task', |
| 211 | + 'Parameters': { |
| 212 | + 'AlgorithmSpecification': { |
| 213 | + 'TrainingImage': PCA_IMAGE, |
| 214 | + 'TrainingInputMode': 'File' |
| 215 | + }, |
| 216 | + 'OutputDataConfig': { |
| 217 | + 'S3OutputPath': 's3://sagemaker/models' |
| 218 | + }, |
| 219 | + 'StoppingCondition': { |
| 220 | + 'MaxRuntimeInSeconds': 86400 |
| 221 | + }, |
| 222 | + 'ResourceConfig': { |
| 223 | + 'InstanceCount': 1, |
| 224 | + 'InstanceType': 'ml.c4.xlarge', |
| 225 | + 'VolumeSizeInGB': 30 |
| 226 | + }, |
| 227 | + 'RoleArn': EXECUTION_ROLE, |
| 228 | + 'HyperParameters': { |
| 229 | + 'feature_dim': '50000', |
| 230 | + 'num_components': '10', |
| 231 | + 'subtract_mean': 'True', |
| 232 | + 'algorithm_mode': 'randomized', |
| 233 | + 'mini_batch_size': '200' |
| 234 | + }, |
| 235 | + 'DebugHookConfig': { |
| 236 | + 'S3OutputPath': 's3://sagemaker/output/debug', |
| 237 | + 'HookParameters': {'save_interval': '1'}, |
| 238 | + 'CollectionConfigurations': [ |
| 239 | + {'CollectionName': 'hyperparameters'}, |
| 240 | + {'CollectionName': 'metrics'} |
| 241 | + ] |
| 242 | + }, |
| 243 | + 'DebugRuleConfigurations': [ |
| 244 | + { |
| 245 | + 'RuleConfigurationName': 'Confusion', |
| 246 | + 'RuleEvaluatorImage': '503895931360.dkr.ecr.us-east-1.amazonaws.com/sagemaker-debugger-rules:latest', |
| 247 | + 'RuleParameters': { |
| 248 | + 'rule_to_invoke': 'Confusion', |
| 249 | + 'category_no': '15', |
| 250 | + 'min_diag': '0.7', |
| 251 | + 'max_off_diag': '0.3', |
| 252 | + 'start_step': '17', |
| 253 | + 'end_step': '19' |
| 254 | + } |
| 255 | + } |
| 256 | + ], |
| 257 | + 'TrainingJobName': 'TrainingJob' |
| 258 | + }, |
| 259 | + 'Resource': 'arn:aws:states:::sagemaker:createTrainingJob.sync', |
| 260 | + 'End': True |
| 261 | + } |
| 262 | + |
151 | 263 | @patch('botocore.client.BaseClient._make_api_call', new=mock_boto_api_call)
|
152 | 264 | def test_training_step_creation_with_model(pca_estimator):
|
153 | 265 | training_step = TrainingStep('Training', estimator=pca_estimator, job_name='TrainingJob')
|
@@ -231,6 +343,9 @@ def test_training_step_creation_with_framework(tensorflow_estimator):
|
231 | 343 | 'OutputDataConfig': {
|
232 | 344 | 'S3OutputPath': 's3://sagemaker/models'
|
233 | 345 | },
|
| 346 | + 'DebugHookConfig': { |
| 347 | + 'S3OutputPath': 's3://sagemaker/models/debug' |
| 348 | + }, |
234 | 349 | 'StoppingCondition': {
|
235 | 350 | 'MaxRuntimeInSeconds': 86400
|
236 | 351 | },
|
|
0 commit comments