Skip to content

Commit 7f126a9

Browse files
authored
[MLOps3.0] Minor fixes (#201)
* Minor fix for LHM demo * Minor fix for custom metrics * Removed monitoring folders * Create .gitignore in LHM demo folder Adding `.gitignore` to ignore local monitoring dashboard jsons in LHM demo * Create .gitignore in MLOps-Advanced demo Adding `.gitignore` to ignore local monitoring dashboard jsons * Quick-Fixes & Cleans Removed batch scoring for synthetic data * Major updates for Serverless, MLflow3.0 and Lakebase - Removed all mentions/use of automl and switched to Optuna with native mlflow spark integration - Added MLflow3.0 deployment job - Updated Feature Engineering/Serving to use new Synched tables (Lakebase) - Minor fixes to monitoring and drift detection * [MLOps3.0] Fixed quickstart and experiments - Tested Quickstart demo - Fixed Experiment name - Updated DAB configuration * Updated references to new images/banners * [MLOps3.0] Fixes for quick-start and Optuna HPO * [MLOps3.0] Minor fixes for FE client * [MLOps3.0] Bug fixes for Optuna/autoML and FE client * Synching with upstream * [MLOps3.0] Bug fixes for Optuna/autoML and FE client * [MLOps3.0] Minor fixes * Synching with upstream
1 parent 33bddb0 commit 7f126a9

File tree

3 files changed

+58
-9
lines changed

3 files changed

+58
-9
lines changed

product_demos/Data-Science/mlops-end2end/02-mlops-advanced/02_model_training_hpo_optuna.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -498,7 +498,7 @@ def prune(self, study, trial):
498498
from mlflow import pyfunc
499499

500500

501-
def optuna_hpo_fn(n_trials: int, X_train: pd.DataFrame, Y_train: pd.Series, X_test: pd.DataFrame, Y_test: pd.Series, training_set_specs_in, preprocessor_in: ColumnTransformer, experiment_id: str, pos_label_in: str = pos_label, rng_seed_in: int = 2025, run_name:str = "spark-mlflow-tuning", optuna_sampler_in: optuna.samplers.TPESampler = optuna_sampler, optuna_pruner_in: optuna.pruners.BasePruner = None, n_jobs: int = 4) -> optuna.study.study.Study:
501+
def optuna_hpo_fn(n_trials: int, X_train: pd.DataFrame, Y_train: pd.Series, X_test: pd.DataFrame, Y_test: pd.Series, training_set_specs_in, preprocessor_in: ColumnTransformer, experiment_id: str, pos_label_in: str = pos_label, rng_seed_in: int = 2025, run_name:str = "spark-mlflow-tuning", optuna_sampler_in: optuna.samplers.TPESampler = optuna_sampler, optuna_pruner_in: optuna.pruners.BasePruner = None, n_jobs: int = 2) -> optuna.study.study.Study:
502502
"""
503503
Increasing `n_jobs` may cause experiment to fail due to failed trials which return None and can't be pruned/caught in parallel mode
504504
"""
@@ -604,7 +604,7 @@ def optuna_hpo_fn(n_trials: int, X_train: pd.DataFrame, Y_train: pd.Series, X_te
604604
run_name="mlops-hpo-best-run", # "smoke-test"
605605
optuna_sampler_in=optuna_sampler,
606606
optuna_pruner_in=NoneValuePruner(),
607-
n_jobs = 2, # Set this to number of physical cores
607+
# n_jobs = 2, # Increase this to number for more parallel trials
608608
)
609609

610610
# COMMAND ----------

product_demos/Data-Science/mlops-end2end/02-mlops-advanced/03a_create_deployment_job.py

Lines changed: 52 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,14 @@
3131

3232
# COMMAND ----------
3333

34+
# MAGIC %md
35+
# MAGIC Last environment tested:
36+
# MAGIC ```
37+
# MAGIC mlflow>=3.3.0
38+
# MAGIC ```
39+
40+
# COMMAND ----------
41+
3442
# MAGIC %pip install --quiet mlflow --upgrade
3543
# MAGIC
3644
# MAGIC
@@ -74,11 +82,9 @@
7482
# COMMAND ----------
7583

7684
# Create job with necessary configuration to connect to model as deployment job
77-
from databricks.sdk import WorkspaceClient
7885
from databricks.sdk.service import jobs
7986

8087

81-
w = WorkspaceClient()
8288
job_settings = jobs.JobSettings(
8389
name=job_name,
8490
tasks=[
@@ -114,9 +120,39 @@
114120
max_concurrent_runs=1,
115121
)
116122

117-
created_job = w.jobs.create(**job_settings.__dict__)
123+
# COMMAND ----------
124+
125+
from databricks.sdk import WorkspaceClient
126+
127+
128+
w = WorkspaceClient()
129+
130+
# Search for the job by name (in case it exists)
131+
existing_jobs = w.jobs.list(name=job_name)
132+
job_id = None
133+
for created_job in existing_jobs:
134+
if created_job.settings.name == job_name and created_job.creator_user_name == current_user:
135+
job_id = created_job.job_id
136+
break
137+
138+
if job_id:
139+
# Update existing job
140+
print("Updating existing...")
141+
w.jobs.update(job_id=job_id, new_settings=job_settings)
142+
143+
else:
144+
# Create new job
145+
print("Creating new...")
146+
created_job = w.jobs.create(**job_settings.__dict__)
147+
job_id = created_job.job_id
148+
149+
print(f"Job ID: {job_id}")
150+
151+
# COMMAND ----------
152+
153+
# DBTITLE 1,ONE-TIME Operation
118154
print("Use the job name " + job_name + " to connect the deployment job to the UC model " + model_name + " as indicated in the UC Model UI.")
119-
print("\nFor your reference, the job ID is: " + str(created_job.job_id))
155+
print("\nFor your reference, the job ID is: " + str(job_id))
120156
print("\nDocumentation: \nAWS: https://docs.databricks.com/aws/mlflow/deployment-job#connect \nAzure: https://learn.microsoft.com/azure/databricks/mlflow/deployment-job#connect \nGCP: https://docs.databricks.com/gcp/mlflow/deployment-job#connect")
121157

122158
# COMMAND ----------
@@ -135,10 +171,19 @@
135171
client = MlflowClient(registry_uri="databricks-uc")
136172

137173
try:
138-
if client.get_registered_model(model_name):
174+
model_info = client.get_registered_model(model_name)
175+
if model_info:
139176
# Model exists - Link job
140-
client.update_registered_model(model_name, deployment_job_id=created_job.job_id)
177+
if model_info.deployment_job_id == job_id:
178+
print("Model exists with existing job - Pass")
179+
pass
180+
181+
else:
182+
print("Model exists - Updating job")
183+
client.update_registered_model(model_name, deployment_job_id="") # Unlink current job
184+
client.update_registered_model(model_name, deployment_job_id=job_id) # Link new one
141185

142186
except mlflow.exceptions.RestException:
143187
# Create Empty Model placeholder and Link job
144-
client.create_registered_model(model_name, deployment_job_id=created_job.job_id)
188+
print("Model does not exist - Creating model and linking job")
189+
client.create_registered_model(model_name, deployment_job_id=job_id)

product_demos/Data-Science/mlops-end2end/02-mlops-advanced/04b_challenger_approval.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@
2222

2323
# COMMAND ----------
2424

25+
# MAGIC %run ../_resources/00-setup $adv_mlops=true
26+
27+
# COMMAND ----------
28+
2529
dbutils.widgets.text("model_name", f"{catalog}.{db}.advanced_mlops_churn", "Model Name") # Will be populated from Deployment Jobs Parameters
2630
dbutils.widgets.text("model_version", "1", "Model Version") # Will be populated from Deployment Jobs Parameters
2731
dbutils.widgets.text("approval_tag_name", "Approval_Check", "Approval Tag to check")

0 commit comments

Comments
 (0)