Fixes for errors when running the AzureML pipeline (#741)

tomconte · julia-meshcheryakova · web-flow · commit 290bd0ecdde6 · 2024-09-20T21:29:13.000Z
Background: I was trying to run the Azure ML pipeline following the docs and ran into a couple of errors that required fixes. - When using `parallel_run_function`, "run() method should return a Pandas DataFrame or an array." - I was getting errors in [this line](https://github.com/microsoft/rag-experiment-accelerator/blob/721a50dde5c41c07587d8ab8562888be6c74cb37/rag_experiment_accelerator/utils/logging.py#L34): where the log level was an empty string. Not sure what is the root cause. --------- Co-authored-by: Julia Meshcheryakova <juliame@microsoft.com>
diff --git a/azureml/index.py b/azureml/index.py
@@ -2,7 +2,6 @@
 import os
 import sys
 import argparse
-from typing import List
 
 import mlflow
 
@@ -57,7 +56,7 @@ def init():
     mlflow_client = mlflow.MlflowClient(args.mlflow_tracking_uri)
 
 
-def run(input_paths: List[str]) -> str:
+def run(input_paths: list[str]) -> list[str]:
     global args
     global config
     global environment
@@ -66,4 +65,4 @@ def run(input_paths: List[str]) -> str:
 
     index_run(environment, config, index_config, input_paths, mlflow_client)
 
-    return args.index_name
+    return [args.index_name]
diff --git a/rag_experiment_accelerator/utils/logging.py b/rag_experiment_accelerator/utils/logging.py
@@ -20,7 +20,7 @@ def get_logger(name: str) -> logging.Logger:
         return logger
 
     global _cached_logging_level
-    if _cached_logging_level is None:
+    if not _cached_logging_level:
         _cached_logging_level = os.getenv("LOGGING_LEVEL", "INFO").upper()
 
     handler = logging.StreamHandler(sys.stdout)