cmu-delphi
diff --git a/‎.bumpversion.cfg‎
Lines changed: 1 addition & 1 deletion b/‎.bumpversion.cfg‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/build-container-images.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/build-container-images.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/r-ci.yml‎
Lines changed: 6 additions & 3 deletions b/‎.github/workflows/r-ci.yml‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 1 deletion b/‎.gitignore‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎_delphi_utils_python/.bumpversion.cfg‎
Lines changed: 1 addition & 1 deletion b/‎_delphi_utils_python/.bumpversion.cfg‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎_delphi_utils_python/delphi_utils/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎_delphi_utils_python/delphi_utils/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎_delphi_utils_python/delphi_utils/archive.py‎
Lines changed: 25 additions & 8 deletions b/‎_delphi_utils_python/delphi_utils/archive.py‎
Lines changed: 25 additions & 8 deletions
diff --git a/‎_delphi_utils_python/delphi_utils/export.py‎
Lines changed: 49 additions & 4 deletions b/‎_delphi_utils_python/delphi_utils/export.py‎
Lines changed: 49 additions & 4 deletions
diff --git a/‎_delphi_utils_python/setup.py‎
Lines changed: 3 additions & 2 deletions b/‎_delphi_utils_python/setup.py‎
Lines changed: 3 additions & 2 deletions
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.1.16
+current_version = 0.1.17
 commit = True
 message = chore: bump covidcast-indicators to {new_version}
 tag = False
@@ -9,7 +9,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        packages: [ "" ]
+        packages: [ facebook ]
     steps:
       - name: Checkout code
         uses: actions/checkout@v2
 
@@ -48,12 +48,15 @@ jobs:
             ${{ runner.os }}-r-facebook-survey-
       - name: Install R dependencies
         run: |
-          if ( packageVersion("readr") != "1.4.0" ) {
+          if ( !require("readr") || packageVersion("readr") != "1.4.0" ) {
             install.packages("devtools")
             devtools::install_version("readr", version = "1.4.0")
           }
-          install.packages("remotes")
-          remotes::update_packages(c("rcmdcheck", "mockr"), upgrade="always")
+
+          if ( !require("remotes") ) {
+            install.packages("remotes")
+          }
+          remotes::update_packages(c("rcmdcheck", "mockr", "remotes"), upgrade="always")
           dependency_list <- remotes::dev_package_deps(dependencies=TRUE)
           remotes::update_packages(dependency_list$package[dependency_list$package != "readr"], upgrade="always")
         shell: Rscript {0}
 
@@ -4,7 +4,7 @@
 params.json
 
 # Do not commit output files
-receiving/*.csv
+**/receiving/*.csv
 
 # Do not commit hidden macOS files
 .DS_Store
 
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.1.12
+current_version = 0.1.13
 commit = True
 message = chore: bump delphi_utils to {new_version}
 tag = False
 
@@ -14,4 +14,4 @@
 from .signal import add_prefix
 from .nancodes import Nans
 
-__version__ = "0.1.12"
+__version__ = "0.1.13"
@@ -40,9 +40,11 @@
 from git import Repo
 from git.refs.head import Head
 import pandas as pd
+import numpy as np
 
 from .utils import read_params
 from .logger import get_structured_logger
+from .nancodes import Nans
 
 Files = List[str]
 FileDiffMap = Dict[str, Optional[str]]
@@ -73,8 +75,10 @@ def diff_export_csv(
         changed_df is the pd.DataFrame of common rows from after_csv with changed values.
         added_df is the pd.DataFrame of added rows from after_csv.
     """
-    export_csv_dtypes = {"geo_id": str, "val": float,
-                         "se": float, "sample_size": float}
+    export_csv_dtypes = {
+        "geo_id": str, "val": float, "se": float, "sample_size": float,
+        "missing_val": int, "missing_se": int, "missing_sample_size": int
+    }
 
     before_df = pd.read_csv(before_csv, dtype=export_csv_dtypes)
     before_df.set_index("geo_id", inplace=True)
@@ -89,12 +93,22 @@ def diff_export_csv(
     before_df_cmn = before_df.reindex(common_idx)
     after_df_cmn = after_df.reindex(common_idx)
 
-    # Exact comparisons, treating NA == NA as True
-    same_mask = before_df_cmn == after_df_cmn
-    same_mask |= pd.isna(before_df_cmn) & pd.isna(after_df_cmn)
+    # If CSVs have different columns (no missingness), mark all values as new
+    if ("missing_val" in before_df_cmn.columns) ^ ("missing_val" in after_df_cmn.columns):
+        same_mask = after_df_cmn.copy()
+        same_mask.loc[:] = False
+    else:
+        # Exact comparisons, treating NA == NA as True
+        same_mask = before_df_cmn == after_df_cmn
+        same_mask |= pd.isna(before_df_cmn) & pd.isna(after_df_cmn)
+
+    # Code deleted entries as nans with the deleted missing code
+    deleted_df = before_df.loc[deleted_idx, :].copy()
+    deleted_df[["val", "se", "sample_size"]] = np.nan
+    deleted_df[["missing_val", "missing_se", "missing_sample_size"]] = Nans.DELETED
 
     return (
-        before_df.loc[deleted_idx, :],
+        deleted_df,
         after_df_cmn.loc[~(same_mask.all(axis=1)), :],
         after_df.loc[added_idx, :])
 
@@ -227,11 +241,11 @@ def diff_exports(self) -> Tuple[Files, FileDiffMap, Files]:
 
             deleted_df, changed_df, added_df = diff_export_csv(
                 before_file, after_file)
-            new_issues_df = pd.concat([changed_df, added_df], axis=0)
+            new_issues_df = pd.concat([deleted_df, changed_df, added_df], axis=0)
 
             if len(deleted_df) > 0:
                 print(
-                    f"Warning, diff has deleted indices in {after_file} that will be ignored")
+                    f"Diff has deleted indices in {after_file} that have been coded as nans.")
 
             # Write the diffs to diff_file, if applicable
             if len(new_issues_df) > 0:
@@ -414,6 +428,9 @@ def archive_exports(self,  # pylint: disable=arguments-differ
                 archive_success.append(exported_file)
             except FileNotFoundError:
                 archive_fail.append(exported_file)
+            except shutil.SameFileError:
+                # no need to copy if the cached file is the same
+                archive_success.append(exported_file)
 
         self._exports_archived = True
 
 
@@ -3,10 +3,33 @@
 from datetime import datetime
 from os.path import join
 from typing import Optional
+import logging
 
+from epiweeks import Week
 import numpy as np
 import pandas as pd
 
+from .nancodes import Nans
+
+def filter_contradicting_missing_codes(df, sensor, metric, date, logger=None):
+    """Find values with contradictory missingness codes, filter them, and log."""
+    columns = ["val", "se", "sample_size"]
+    # Get indicies where the XNOR is true (i.e. both are true or both are false).
+    masks = [
+        ~(df[column].isna() ^ df["missing_" + column].eq(Nans.NOT_MISSING))
+        for column in columns
+    ]
+    for mask in masks:
+        if not logger is None and df.loc[mask].size > 0:
+            logger.info(
+                "Filtering contradictory missing code in " +
+                "{0}_{1}_{2}.".format(sensor, metric, date.strftime(format="%Y-%m-%d"))
+            )
+            df = df.loc[~mask]
+        elif logger is None and df.loc[mask].size > 0:
+            df = df.loc[~mask]
+    return df
+
 def create_export_csv(
     df: pd.DataFrame,
     export_dir: str,
@@ -16,7 +39,9 @@ def create_export_csv(
     start_date: Optional[datetime] = None,
     end_date: Optional[datetime] = None,
     remove_null_samples: Optional[bool] = False,
-    write_empty_days: Optional[bool] = False
+    write_empty_days: Optional[bool] = False,
+    logger: Optional[logging.Logger] = None,
+    weekly_dates = False,
 ):
     """Export data in the format expected by the Delphi API.
 
@@ -43,6 +68,8 @@ def create_export_csv(
     write_empty_days: Optional[bool]
         If true, every day in between start_date and end_date will have a CSV file written
         even if there is no data for the day. If false, only the days present are written.
+    logger: Optional[logging.Logger]
+        Pass a logger object here to log information about contradictory missing codes.
 
     Returns
     ---------
@@ -65,12 +92,30 @@ def create_export_csv(
         dates = pd.date_range(start_date, end_date)
 
     for date in dates:
+        if weekly_dates:
+            t = Week.fromdate(pd.to_datetime(str(date)))
+            date_str = "weekly_" + str(t.year) + str(t.week).zfill(2)
+        else:
+            date_str = date.strftime('%Y%m%d')
         if metric is None:
-            export_filename = f"{date.strftime('%Y%m%d')}_{geo_res}_{sensor}.csv"
+            export_filename = f"{date_str}_{geo_res}_{sensor}.csv"
         else:
-            export_filename = f"{date.strftime('%Y%m%d')}_{geo_res}_{metric}_{sensor}.csv"
+            export_filename = f"{date_str}_{geo_res}_{metric}_{sensor}.csv"
         export_file = join(export_dir, export_filename)
-        export_df = df[df["timestamp"] == date][["geo_id", "val", "se", "sample_size",]]
+        expected_columns = [
+            "geo_id",
+            "val",
+            "se",
+            "sample_size",
+            "missing_val",
+            "missing_se",
+            "missing_sample_size"
+        ]
+        export_df = df[df["timestamp"] == date].filter(items=expected_columns)
+        if "missing_val" in export_df.columns:
+            export_df = filter_contradicting_missing_codes(
+                export_df, sensor, metric, date, logger=logger
+            )
         if remove_null_samples:
             export_df = export_df[export_df["sample_size"].notnull()]
         export_df = export_df.round({"val": 7, "se": 7})
 
@@ -7,6 +7,7 @@
 required = [
     "boto3",
     "covidcast",
+    "epiweeks",
     "freezegun",
     "gitpython",
     "mock",
@@ -24,7 +25,7 @@
 
 setup(
     name="delphi_utils",
-    version="0.1.12",
+    version="0.1.13",
     description="Shared Utility Functions for Indicators",
     long_description=long_description,
     long_description_content_type="text/markdown",
@@ -35,7 +36,7 @@
     classifiers=[
         "Development Status :: 5 - Production/Stable",
         "Intended Audience :: Developers",
-        "Programming Language :: Python :: 3.7",
+        "Programming Language :: Python :: 3.8",
     ],
     packages=find_packages(),
     package_data={'': ['data/*.csv']}