Skip to content

Commit 12a9712

Browse files
aysim319melange396minhkhul
authored
Fix unstructured logging variable formatting (#2029)
Co-authored-by: george <george.haff@gmail.com> Co-authored-by: minhkhul <118945681+minhkhul@users.noreply.github.com>
1 parent 5c72cbd commit 12a9712

File tree

29 files changed

+163
-132
lines changed

29 files changed

+163
-132
lines changed

_delphi_utils_python/README.md

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,22 @@ Source code can be found here:
2222

2323
## Logger Usage
2424

25+
To make our structured logging as useful as it can be, particularly within the context of how we use logs in Elastic, the `event` argument (typically the first unnamed arg) should be a static string (to make filtering easier), and each dynamic/varying value should be specified in an individual meaningfully- and consistently-named argument to the logger call (for use in filtering, thresholding, grouping, visualization, etc).
26+
27+
### Commonly used argument names:
28+
- data_source
29+
- geo_type
30+
- signal
31+
- issue_date
32+
- filename
33+
2534
Single-thread usage.
2635

2736
```py
2837
from delphi_utils.logger import get_structured_logger
2938

3039
logger = get_structured_logger('my_logger')
31-
logger.info('Hello, world!')
40+
logger.info('Hello', name='World')
3241
```
3342

3443
Multi-thread usage.

_delphi_utils_python/delphi_utils/export.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,10 @@ def filter_contradicting_missing_codes(df, sensor, metric, date, logger=None):
2222
for mask in masks:
2323
if not logger is None and df.loc[mask].size > 0:
2424
logger.info(
25-
"Filtering contradictory missing code in " +
26-
"{0}_{1}_{2}.".format(sensor, metric, date.strftime(format="%Y-%m-%d"))
25+
"Filtering contradictory missing code",
26+
sensor=sensor,
27+
metric=metric,
28+
date=date.strftime(format="%Y-%m-%d"),
2729
)
2830
df = df.loc[~mask]
2931
elif logger is None and df.loc[mask].size > 0:

_delphi_utils_python/delphi_utils/flash_eval/eval_day.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,8 +153,7 @@ def output(evd_ranking, day, lag, signal, logger):
153153
p_text += f"\t{start_link}|*{index}*, {'{:.2f}'.format(value)}>\n"
154154
else:
155155
break
156-
name = f"Signal: {signal} Lag: {lag}"
157-
logger.info(name, payload=p_text)
156+
logger.info("FLaSH: worth inspecting", signal=signal, lag=lag, payload=p_text)
158157

159158

160159
def evd_ranking_fn(ts_streams, EVD_max, EVD_min):

_delphi_utils_python/delphi_utils/logger.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
"""Structured logger utility for creating JSON logs.
22
3-
See the delphi_utils README.md for usage examples.
3+
To make our structured logging as useful as it can be, particularly within the context of how we use logs in Elastic,
4+
the `event` argument (typically the first unnamed arg) should be a static string (to make filtering easier),
5+
and each dynamic/varying value should be specified in an individual meaningfully- and consistently-named argument
6+
to the logger call (for use in filtering, thresholding, grouping, visualization, etc)
47
5-
The Delphi group uses two ~identical versions of this file.
6-
Try to keep them in sync with edits, for sanity.
7-
https://github.com/cmu-delphi/covidcast-indicators/blob/main/_delphi_utils_python/delphi_utils/logger.py
8-
https://github.com/cmu-delphi/delphi-epidata/blob/dev/src/common/logger.py
8+
See the delphi_utils README.md for usage examples.
99
"""
1010

1111
import contextlib

_delphi_utils_python/delphi_utils/runner.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ def run_indicator_pipeline(indicator_fn: Callable[[Params], None],
5151

5252
#Get version and indicator name for startup
5353
ind_name = indicator_fn.__module__.replace(".run", "")
54+
5455
#Check for version.cfg in indicator directory
5556
if os.path.exists("version.cfg"):
5657
with open("version.cfg") as ver_file:
@@ -59,9 +60,15 @@ def run_indicator_pipeline(indicator_fn: Callable[[Params], None],
5960
if "current_version" in line:
6061
current_version = str.strip(line)
6162
current_version = current_version.replace("current_version = ", "")
62-
#Logging - Starting Indicator
63-
logger.info(f"Started {ind_name} with covidcast-indicators version {current_version}")
64-
else: logger.info(f"Started {ind_name} without version.cfg")
63+
logger.info(
64+
"Started a covidcast-indicator",
65+
indicator_name=ind_name,
66+
current_version=current_version,
67+
)
68+
else:
69+
logger.info(
70+
"Started a covidcast-indicator without version.cfg", indicator_name=ind_name
71+
)
6572

6673
indicator_fn(params)
6774
validator = validator_fn(params)
@@ -77,8 +84,10 @@ def run_indicator_pipeline(indicator_fn: Callable[[Params], None],
7784
break
7885
time.sleep(1)
7986
else:
80-
logger.error(f"Flash step timed out ({timer} s), terminating",
81-
elapsed_time_in_seconds = round(time.time() - start, 2))
87+
logger.error(
88+
"Flash step timed out, terminating",
89+
elapsed_time_in_seconds=round(time.time() - start, 2),
90+
)
8291
t1.terminate()
8392
t1.join()
8493
if validator:

_delphi_utils_python/tests/test_export.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -323,12 +323,13 @@ def test_export_df_with_missingness(self, tmp_path):
323323

324324
@mock.patch("delphi_utils.logger")
325325
def test_export_df_with_contradictory_missingness(self, mock_logger, tmp_path):
326-
326+
sensor = "test"
327+
geo_res = "state"
327328
create_export_csv(
328329
df=self.DF3.copy(),
329330
export_dir=tmp_path,
330-
geo_res="state",
331-
sensor="test",
331+
sensor=sensor,
332+
geo_res=geo_res,
332333
logger=mock_logger
333334
)
334335
assert set(listdir(tmp_path)) == set(
@@ -339,8 +340,9 @@ def test_export_df_with_contradictory_missingness(self, mock_logger, tmp_path):
339340
]
340341
)
341342
assert pd.read_csv(join(tmp_path, "20200315_state_test.csv")).size > 0
343+
date_str = datetime.strftime(self.TIMES[0], "%Y-%m-%d")
342344
mock_logger.info.assert_called_once_with(
343-
"Filtering contradictory missing code in test_None_2020-02-15."
345+
"Filtering contradictory missing code", sensor=sensor, metric=None, date=date_str
344346
)
345347

346348
def test_export_sort(self, tmp_path):

changehc/delphi_changehc/run.py

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def retrieve_files(params, filedate, logger):
2525
if files["denom"] is None:
2626

2727
## download recent files from FTP server
28-
logger.info("downloading recent files through SFTP")
28+
logger.info("Downloading recent files through SFTP")
2929
download_counts(filedate, params["indicator"]["input_cache_dir"], params["indicator"]["ftp_conn"])
3030

3131
denom_file = "%s/%s_Counts_Products_Denom.dat.gz" % (params["indicator"]["input_cache_dir"],filedate)
@@ -157,28 +157,30 @@ def run_module(params: Dict[str, Dict[str, Any]]):
157157

158158
startdate, enddate = process_dates(params, startdate_dt, enddate_dt)
159159

160-
logger.info("generating signal and exporting to CSV",
161-
first_sensor_date = startdate,
162-
last_sensor_date = enddate,
163-
drop_date = dropdate,
164-
n_backfill_days = n_backfill_days,
165-
n_waiting_days = n_waiting_days,
166-
geos = params["indicator"]["geos"],
167-
export_dir = params["common"]["export_dir"],
168-
parallel = params["indicator"]["parallel"],
169-
weekday = params["indicator"]["weekday"],
170-
types = params["indicator"]["types"],
171-
se = params["indicator"]["se"])
160+
logger.info(
161+
"Generating signal and exporting to CSV",
162+
first_sensor_date=startdate,
163+
last_sensor_date=enddate,
164+
drop_date=dropdate,
165+
n_backfill_days=n_backfill_days,
166+
n_waiting_days=n_waiting_days,
167+
geos=params["indicator"]["geos"],
168+
export_dir=params["common"]["export_dir"],
169+
parallel=params["indicator"]["parallel"],
170+
weekday=params["indicator"]["weekday"],
171+
types=params["indicator"]["types"],
172+
se=params["indicator"]["se"],
173+
)
172174

173175
## start generating
174176
stats = []
175177
for geo in params["indicator"]["geos"]:
176178
for numtype in params["indicator"]["types"]:
177179
for weekday in params["indicator"]["weekday"]:
178180
if weekday:
179-
logger.info("starting weekday adj", geo = geo, numtype = numtype)
181+
logger.info("Starting weekday adj", geo_type=geo, numtype=numtype)
180182
else:
181-
logger.info("starting no adj", geo = geo, numtype = numtype)
183+
logger.info("Starting no adj", geo_type=geo, numtype=numtype)
182184
su_inst = CHCSensorUpdater(
183185
startdate,
184186
enddate,
@@ -211,7 +213,7 @@ def run_module(params: Dict[str, Dict[str, Any]]):
211213
)
212214
stats.extend(more_stats)
213215

214-
logger.info("finished processing", geo = geo)
216+
logger.info("Finished processing", geo_type=geo)
215217

216218
elapsed_time_in_seconds = round(time.time() - start_time, 2)
217219
min_max_date = stats and min(s[0] for s in stats)

changehc/delphi_changehc/sensor.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -118,10 +118,10 @@ def fit(y_data, first_sensor_date, geo_id, logger, num_col="num", den_col="den")
118118
se_valid = valid_rates.eval('sqrt(rate * (1 - rate) / den)')
119119
rate_data['se'] = se_valid
120120

121-
logger.debug("{0}: {1:.3f},[{2:.3f}]".format(
122-
geo_id, rate_data['rate'][-1], rate_data['se'][-1]
123-
))
124-
return {"geo_id": geo_id,
125-
"rate": 100 * rate_data['rate'],
126-
"se": 100 * rate_data['se'],
127-
"incl": include}
121+
logger.debug(
122+
".fit() DEBUG - last rate/se for geo",
123+
geo_value=geo_id,
124+
value=rate_data["rate"][-1],
125+
se=rate_data["se"][-1],
126+
)
127+
return {"geo_id": geo_id, "rate": 100 * rate_data["rate"], "se": 100 * rate_data["se"], "incl": include}

changehc/delphi_changehc/update_sensor.py

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -41,17 +41,15 @@ def write_to_csv(df, geo_level, write_se, day_shift, out_name, logger, output_pa
4141
assert df[suspicious_se_mask].empty, " se contains suspiciously large values"
4242
assert not df["se"].isna().any(), " se contains nan values"
4343
if write_se:
44-
logger.info("========= WARNING: WRITING SEs TO {0} =========".format(out_name))
44+
logger.info("WARNING: WRITING SEs", filename=out_name)
4545
else:
4646
df["se"] = np.nan
4747

4848
assert not df["val"].isna().any(), " val contains nan values"
4949
suspicious_val_mask = df["val"].gt(90)
5050
if not df[suspicious_val_mask].empty:
5151
for geo in df.loc[suspicious_val_mask, "geo_id"]:
52-
logger.warning("value suspiciously high, {0}: {1}".format(
53-
geo, out_name
54-
))
52+
logger.warning("Value suspiciously high", geo_value=geo, filename=out_name)
5553

5654
dates = create_export_csv(
5755
df,
@@ -62,10 +60,8 @@ def write_to_csv(df, geo_level, write_se, day_shift, out_name, logger, output_pa
6260
sensor=out_name,
6361
write_empty_days=True
6462
)
65-
logger.debug("wrote {0} rows for {1} {2}".format(
66-
df.size, df["geo_id"].unique().size, geo_level
67-
))
68-
logger.debug("wrote files to {0}".format(output_path))
63+
logger.debug("Wrote rows", num_rows=df.size, geo_type=geo_level, num_geo_ids=df["geo_id"].unique().size)
64+
logger.debug("Wrote files", export_dir=output_path)
6965
return dates
7066

7167

@@ -148,8 +144,9 @@ def geo_reindex(self, data):
148144
geo = self.geo
149145
gmpr = GeoMapper()
150146
if geo not in {"county", "state", "msa", "hrr", "nation", "hhs"}:
151-
self.logger.error("{0} is invalid, pick one of 'county', "
152-
"'state', 'msa', 'hrr', 'hss','nation'".format(geo))
147+
self.logger.error(
148+
"Geo is invalid, pick one of 'county', " "'state', 'msa', 'hrr', 'hss','nation'", geo_type=geo
149+
)
153150
return False
154151
if geo == "county":
155152
data_frame = gmpr.fips_to_megacounty(data,
@@ -224,7 +221,7 @@ def update_sensor(self,
224221
dfs.append(res)
225222
else:
226223
n_cpu = min(10, cpu_count())
227-
self.logger.debug("starting pool with {0} workers".format(n_cpu))
224+
self.logger.debug("Starting pool", n_workers=n_cpu)
228225
with Pool(n_cpu) as pool:
229226
pool_results = []
230227
for geo_id, sub_data in data_frame.groupby(level=0,as_index=False):

changehc/tests/test_update_sensor.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import pandas as pd
1010
import numpy as np
1111
from boto3 import Session
12+
from delphi_utils import get_structured_logger
1213
from moto import mock_s3
1314
import pytest
1415

@@ -28,7 +29,7 @@
2829
DENOM_FILEPATH = PARAMS["indicator"]["input_denom_file"]
2930
DROP_DATE = pd.to_datetime(PARAMS["indicator"]["drop_date"])
3031
OUTPATH="test_data/"
31-
TEST_LOGGER = logging.getLogger()
32+
TEST_LOGGER = get_structured_logger()
3233

3334
class TestCHCSensorUpdater:
3435
"""Tests for updating the sensors."""

0 commit comments

Comments
 (0)