From d9d35a25e5445d61389313138a57099217dd231f Mon Sep 17 00:00:00 2001 From: Iain Gillis Date: Thu, 14 Mar 2024 11:49:42 -0600 Subject: [PATCH] add capability to pass options to to_csv method On Python 3.10 and 3.11, an upstream bug in pandas causes a failure when serializing a dataframe to csv when there's a null byte in the dataframe. This pull request leaves the default behaviour alone, but gives users the options to modify to_csv behaviour, including fixing that issue with the `escapechar` parameter. See https://github.com/pandas-dev/pandas/issues/47871 --- socrata/sources.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/socrata/sources.py b/socrata/sources.py index eb5bc67..2bcca02 100644 --- a/socrata/sources.py +++ b/socrata/sources.py @@ -445,6 +445,8 @@ def df(self, dataframe, **kwargs): max_retries (integer): Optional retry limit per chunk in the upload. Defaults to 5. backoff_seconds (integer): Optional amount of time to backoff upon a chunk upload failure. Defaults to 2. + + pd_to_csv_params (dict): Optional keyword arguments passed to pd.DataFrame.to_csv method. Defaults to {}. ``` Returns: @@ -458,9 +460,17 @@ def df(self, dataframe, **kwargs): df = pandas.read_csv('test/fixtures/simple.csv') upload = upload.df(df) ``` + ```python + import pandas + # assume test/fixtures/simple.csv contains the null byte \x00 + # see https://github.com/pandas-dev/pandas/issues/47871 + df = pandas.read_csv('test/fixtures/simple.csv') + upload = upload.df(df, pd_to_csv_params={"escapechar": "\\"}) + ``` """ s = io.StringIO() - dataframe.to_csv(s, index=False) + pd_to_csv_params = kwargs.pop("pd_to_csv_params", {}) + dataframe.to_csv(s, index=False, **pd_to_csv_params) return self._chunked_bytes(bytes(s.getvalue().encode()),"text/csv", **kwargs) def add_to_revision(self, uri, revision):