Skip to content

Commit f9c009a

Browse files
[Data] - Preserve existing handlers for logging - don't use dictConfig (#57169)
<!-- Thank you for your contribution! Please review https://github.com/ray-project/ray/blob/master/CONTRIBUTING.rst before opening a pull request. --> <!-- Please add a reviewer to the assignee section when you create a PR. If you don't have the access to it, we will shortly find a reviewer and assign them to your PR. --> ## Why are these changes needed? So the way the logger works is log() -> buffer -> flush() -> target (file/console/remote_uri) On shutdown (triggered by logging.config.dictConfig), the MemoryHandler is closed which sets the output target to None When the target is set to None, the logs in the buffer have nowhere to be flushed, so they're retained in memory, hence it accumulates to no end. Solution: Temporarily detach the handlers for loggers (preserve attributes like `level`, `target`, `filter` and `formatters`) and restore them after calling `dictConfig()` Based off of https://github.com/ray-project/ray/pull/48958/files#diff-d5e32f872cf7b123f0899714728ac7fc0916a5609844a7d1bf7317d98094778c (Ray Core - Ray Data log fix) ## Related issue number <!-- For example: "Closes #1234" --> ## Checks - [x] I've signed off every commit(by using the -s flag, i.e., `git commit -s`) in this PR. - [x] I've run pre-commit jobs to lint the changes in this PR. ([pre-commit setup](https://docs.ray.io/en/latest/ray-contribute/getting-involved.html#lint-and-formatting)) - [ ] I've included any doc changes needed for https://docs.ray.io/en/master/. - [ ] I've added any new APIs to the API Reference. For example, if I added a method in Tune, I've added it in `doc/source/tune/api/` under the corresponding `.rst` file. - [ ] I've made sure the tests are passing. Note that there might be a few flaky tests, see the recent failures at https://flakey-tests.ray.io/ - Testing Strategy - [x] Unit tests - [ ] Release tests - [ ] This PR is not tested :( --------- Signed-off-by: Goutam V. <goutam@anyscale.com> Signed-off-by: Goutam <goutam@anyscale.com>
1 parent 386bf59 commit f9c009a

File tree

2 files changed

+150
-6
lines changed

2 files changed

+150
-6
lines changed

python/ray/data/_internal/logging.py

Lines changed: 109 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -180,20 +180,124 @@ def _get_logger_names() -> List[str]:
180180
def configure_logging() -> None:
181181
"""Configure the Python logger named 'ray.data'.
182182
183-
This function loads the configration YAML specified by "RAY_DATA_LOGGING_CONFIG"
183+
This function loads the configuration YAML specified by "RAY_DATA_LOGGING_CONFIG"
184184
environment variable. If the variable isn't set, this function loads the default
185185
"logging.yaml" file that is adjacent to this module.
186186
187187
If "RAY_DATA_LOG_ENCODING" is specified as "JSON" we will enable JSON logging mode
188188
if using the default logging config.
189189
"""
190+
config = _get_logging_config()
190191

191-
# Dynamically load env vars
192+
# Create formatters, filters, and handlers from config
193+
formatters = _create_formatters(config)
194+
filters = _create_filters(config)
195+
handlers = _create_handlers(config, formatters, filters)
196+
197+
# Configure each logger defined in the config
198+
_configure_loggers(config, handlers)
199+
200+
# Warn if both env vars are set (incompatible)
201+
_warn_if_incompatible_env_vars()
202+
203+
204+
def _import_class(class_path: str):
205+
"""Dynamically import a class from a fully qualified path."""
206+
import importlib
207+
208+
if "." not in class_path:
209+
raise ValueError(f"Invalid class path: {class_path}")
210+
211+
module_name, class_name = class_path.rsplit(".", 1)
212+
module = importlib.import_module(module_name)
213+
return getattr(module, class_name)
214+
215+
216+
def _create_formatters(config: dict) -> dict:
217+
"""Create formatter instances from config."""
218+
formatters = {}
219+
220+
for name, fmt_config in config.get("formatters", {}).items():
221+
if "class" in fmt_config:
222+
formatter_class = _import_class(fmt_config["class"])
223+
formatters[name] = formatter_class()
224+
elif "format" in fmt_config:
225+
formatters[name] = logging.Formatter(fmt_config["format"])
226+
227+
return formatters
228+
229+
230+
def _create_filters(config: dict) -> dict:
231+
"""Create filter instances from config."""
232+
filters = {}
233+
234+
for name, filter_config in config.get("filters", {}).items():
235+
# https://docs.python.org/3/library/logging.config.html#dictionary-schema-details
236+
if "()" in filter_config:
237+
filter_class = _import_class(filter_config["()"])
238+
filters[name] = filter_class()
239+
240+
return filters
241+
242+
243+
def _create_handlers(config: dict, formatters: dict, filters: dict) -> dict:
244+
"""Create and configure handler instances from config."""
245+
handlers = {}
246+
247+
# Keys that are not passed to handler constructor
248+
HANDLER_CONFIG_KEYS = {"class", "level", "formatter", "filters"}
249+
250+
for name, handler_config in config.get("handlers", {}).items():
251+
# Instantiate handler with all keys except config-only keys
252+
handler_class = _import_class(handler_config["class"])
253+
handler_kwargs = {
254+
k: v for k, v in handler_config.items() if k not in HANDLER_CONFIG_KEYS
255+
}
256+
handler = handler_class(**handler_kwargs)
257+
handler.name = name
258+
259+
# Configure handler
260+
if "level" in handler_config:
261+
handler.setLevel(handler_config["level"])
262+
263+
if "formatter" in handler_config:
264+
formatter = formatters.get(handler_config["formatter"])
265+
if formatter:
266+
handler.setFormatter(formatter)
267+
268+
for filter_name in handler_config.get("filters", []):
269+
filter_obj = filters.get(filter_name)
270+
if filter_obj:
271+
handler.addFilter(filter_obj)
272+
273+
handlers[name] = handler
274+
275+
return handlers
276+
277+
278+
def _configure_loggers(config: dict, handlers: dict) -> None:
279+
"""Configure logger instances from config."""
280+
for logger_name, logger_config in config.get("loggers", {}).items():
281+
logger = logging.getLogger(logger_name)
282+
logger.setLevel(logger_config.get("level", logging.NOTSET))
283+
284+
# Clear existing handlers
285+
for handler in logger.handlers[:]:
286+
logger.removeHandler(handler)
287+
288+
# Add configured handlers
289+
for handler_name in logger_config.get("handlers", []):
290+
handler = handlers.get(handler_name)
291+
if handler:
292+
logger.addHandler(handler)
293+
294+
logger.propagate = logger_config.get("propagate", True)
295+
296+
297+
def _warn_if_incompatible_env_vars() -> None:
298+
"""Warn if both RAY_DATA_LOGGING_CONFIG and RAY_DATA_LOG_ENCODING are set."""
192299
config_path = os.environ.get(RAY_DATA_LOGGING_CONFIG_ENV_VAR_NAME)
193300
log_encoding = os.environ.get(RAY_DATA_LOG_ENCODING_ENV_VAR_NAME)
194-
config = _get_logging_config()
195-
196-
logging.config.dictConfig(config)
197301

198302
# After configuring logger, warn if RAY_DATA_LOGGING_CONFIG is used with
199303
# RAY_DATA_LOG_ENCODING, because they are not both supported together.
@@ -330,5 +434,4 @@ def unregister_dataset_logger(dataset_id: str) -> Optional[int]:
330434
for logger in loggers:
331435
logger.removeHandler(log_handler)
332436
log_handler.close()
333-
334437
return _ACTIVE_DATASET

python/ray/data/tests/test_logging.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,47 @@ def test_json_logging_configuration(
167167
assert "turkey" not in console_log_output
168168

169169

170+
def test_configure_logging_preserves_existing_handlers(reset_logging, shutdown_only):
171+
"""Test that configure_logging() preserves existing handlers.
172+
173+
When configure_logging() is called, it should not remove existing handlers
174+
like MemoryHandler that were added to loggers before configuration.
175+
"""
176+
ray.init()
177+
178+
# Create a logger and add a MemoryHandler with a target before configuring Ray Data logging
179+
test_logger = logging.getLogger("ray.serve.test_preserve")
180+
target_handler = logging.StreamHandler()
181+
memory_handler = logging.handlers.MemoryHandler(capacity=100, target=target_handler)
182+
test_logger.addHandler(memory_handler)
183+
184+
try:
185+
# Verify the memory handler is there and target is set
186+
assert memory_handler in test_logger.handlers
187+
assert memory_handler.target is not None
188+
assert memory_handler.target is target_handler
189+
190+
# Configure Ray Data logging
191+
configure_logging()
192+
193+
# Verify the memory handler is still present after configuration
194+
assert memory_handler in test_logger.handlers
195+
196+
# Verify the target is still set (would be None if handler was closed/recreated)
197+
assert memory_handler.target is not None
198+
assert memory_handler.target is target_handler
199+
200+
# Verify the memory handler still works
201+
test_logger.info("test message")
202+
assert len(memory_handler.buffer) == 1
203+
assert "test message" in memory_handler.buffer[0].getMessage()
204+
finally:
205+
# Clean up handlers to avoid logging errors during teardown
206+
test_logger.removeHandler(memory_handler)
207+
memory_handler.close()
208+
target_handler.close()
209+
210+
170211
if __name__ == "__main__":
171212
import sys
172213

0 commit comments

Comments
 (0)