Skip to content

Commit 4e3ac82

Browse files
Jingjing TangJingjing Tang
authored andcommitted
fixed errors in filling missing values with 0s in raw
1 parent db459b2 commit 4e3ac82

File tree

2 files changed

+9
-10
lines changed

2 files changed

+9
-10
lines changed

google_symptoms/delphi_google_symptoms/pull.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,12 @@
11
# -*- coding: utf-8 -*-
22
import re
3+
from datetime import timedelta
34

45
import numpy as np
56
import pandas as pd
67

78
from .constants import STATE_TO_ABBREV
89

9-
REPLACE_FIPS = [
10-
("46102", "46113"),
11-
]
12-
1310
def get_geo_id(region_code):
1411
"""
1512
There are region code in the format of "US-state" and "US-state-fips". In
@@ -95,17 +92,16 @@ def pull_gs_data(base_url, metrics, level):
9592
"schema may have changed. Please investigate."
9693
)
9794

98-
# Let each FIPS/state has same number of rows
95+
# Make sure each FIPS/state has same number of rows
9996
geo_list = df["geo_id"].unique()
100-
date_list = pd.date_range(start=df["date"].min(),
97+
date_list = pd.date_range(start=df["date"].min()-timedelta(days=7),
10198
end=df["date"].max(),
10299
freq='D')
103100
index_df = pd.MultiIndex.from_product(
104101
[geo_list, date_list], names=['geo_id', 'date']
105102
)
106-
df = df.groupby(
107-
["geo_id", "date"]
108-
).sum().reindex(
103+
df = df.set_index(["geo_id", "date"]
104+
).reindex(
109105
index_df
110106
).reset_index(
111107
).rename({"date": "timestamp"}, axis = 1)

google_symptoms/delphi_google_symptoms/run.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,12 @@ def run_module():
2222
base_url = params["base_url"]
2323

2424
for geo_res in GEO_RESOLUTIONS:
25-
df = pull_gs_data(base_url, METRICS, geo_res)
25+
df_pull = pull_gs_data(base_url, METRICS, geo_res)
2626
for metric, smoother in product(METRICS, SMOOTHERS):
2727
print(geo_res, metric, smoother)
28+
df = df_pull.copy()
29+
if smoother == "smoothed":
30+
df = df.fillna(0)
2831
df["val"] = SMOOTHERS_MAP[smoother][0](df["symptom:"+metric].values)
2932
df["se"] = np.nan
3033
df["sample_size"] = np.nan

0 commit comments

Comments
 (0)