From e1d0119ea89a3aa227a27941d0c66e81ef17d581 Mon Sep 17 00:00:00 2001 From: Jessica Oraegbu Date: Mon, 20 Oct 2025 17:41:25 -0700 Subject: [PATCH 1/7] DOC: Add floating point precision on writing/reading to csv (#13159) guidance --- doc/source/user_guide/io.rst | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 5b25462568cfa..aab1e6374a1cf 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -1671,6 +1671,42 @@ function takes a number of arguments. Only the first is required. * ``chunksize``: Number of rows to write at a time * ``date_format``: Format string for datetime objects +Floating Point Precision on Writing and Reading to CSV Files ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +Floating Point Precision inaccuracies when writing and reading to CSV files happen due to how the numeric data is represented and parsed in pandas. +During the write process, pandas converts all the numeric values into text that is stored as bytes in the CSV file. However, when we read the CSV back, pandas parses those +text values and converts them back into different types (floats, integers, strings) which is when the loss of float point precision happens. +The conversion process is not always guaranteed to be accurate because small differences in data representation between original and reloaded data frame can occur leading to precision loss. + +* ``float_format``: Format string for floating point numbers +``df.to_csv('file.csv', float_format='%.17g')`` allows for floating point precision to be specified when writing to the CSV file. In this example, this ensures that the floating point is written in this exact format of 17 significant digits (64-bit float). + +``df = pd.read_csv('file.csv', float_precision='round_trip')`` allows for floating point precision to be specified when reading from the CSV file. This is guaranteed to round-trip values after writing to a file and Pandas will read the numbers without losing or changing decimal places. + +.. ipython:: python + import pandas as pd + import os + + x0 = 18292498239.824 + df1 = pd.DataFrame({'One': [x0]}, index=["bignum"]) + + df1.to_csv('test.csv', float_format='%.17g') + df2 = pd.read_csv('test.csv', index_col=0, float_precision='round_trip') + + x1 = df1['One'][0] + x2 = df2['One'][0] + + print("x0 = %f; x1 = %f; Are they equal? %s" % (x0, x1, (x0 == x1))) + print("x0 = %f; x2 = %f; Are they equal? %s" % (x0, x2, (x0 == x2))) + + os.remove('test.csv') + + .. Output received: + x0 = 18292498239.824001; x1 = 18292498239.824001; Are they equal? True + x0 = 18292498239.824001; x2 = 18292498239.824001; Are they equal? True + .. + Writing a formatted string ++++++++++++++++++++++++++ From a5410c1f7d28fd091e5a69491eb5ab899e414ddb Mon Sep 17 00:00:00 2001 From: Jessica Oraegbu Date: Mon, 20 Oct 2025 18:03:49 -0700 Subject: [PATCH 2/7] Fixing format in floating point precision docs --- doc/source/user_guide/io.rst | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index aab1e6374a1cf..be71cfde2480a 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -1672,7 +1672,7 @@ function takes a number of arguments. Only the first is required. * ``date_format``: Format string for datetime objects Floating Point Precision on Writing and Reading to CSV Files -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Floating Point Precision inaccuracies when writing and reading to CSV files happen due to how the numeric data is represented and parsed in pandas. During the write process, pandas converts all the numeric values into text that is stored as bytes in the CSV file. However, when we read the CSV back, pandas parses those @@ -1680,11 +1680,13 @@ text values and converts them back into different types (floats, integers, strin The conversion process is not always guaranteed to be accurate because small differences in data representation between original and reloaded data frame can occur leading to precision loss. * ``float_format``: Format string for floating point numbers + ``df.to_csv('file.csv', float_format='%.17g')`` allows for floating point precision to be specified when writing to the CSV file. In this example, this ensures that the floating point is written in this exact format of 17 significant digits (64-bit float). ``df = pd.read_csv('file.csv', float_precision='round_trip')`` allows for floating point precision to be specified when reading from the CSV file. This is guaranteed to round-trip values after writing to a file and Pandas will read the numbers without losing or changing decimal places. .. ipython:: python + import pandas as pd import os @@ -1702,11 +1704,6 @@ The conversion process is not always guaranteed to be accurate because small dif os.remove('test.csv') - .. Output received: - x0 = 18292498239.824001; x1 = 18292498239.824001; Are they equal? True - x0 = 18292498239.824001; x2 = 18292498239.824001; Are they equal? True - .. - Writing a formatted string ++++++++++++++++++++++++++ From 28fb8dd0cce5d928bddb04d59451b33a0bc3ef84 Mon Sep 17 00:00:00 2001 From: jessica-oraegbu Date: Wed, 29 Oct 2025 08:23:49 -0700 Subject: [PATCH 3/7] Update doc/source/user_guide/io.rst Co-authored-by: William Ayd --- doc/source/user_guide/io.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index be71cfde2480a..e762265e9ce5b 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -1696,7 +1696,7 @@ The conversion process is not always guaranteed to be accurate because small dif df1.to_csv('test.csv', float_format='%.17g') df2 = pd.read_csv('test.csv', index_col=0, float_precision='round_trip') - x1 = df1['One'][0] + x1 = df1.iloc[0, 0] x2 = df2['One'][0] print("x0 = %f; x1 = %f; Are they equal? %s" % (x0, x1, (x0 == x1))) From 70403750dd3066ceb8b734ab861c807589c27341 Mon Sep 17 00:00:00 2001 From: jessica-oraegbu Date: Wed, 29 Oct 2025 08:24:10 -0700 Subject: [PATCH 4/7] Update doc/source/user_guide/io.rst Co-authored-by: William Ayd --- doc/source/user_guide/io.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index e762265e9ce5b..639fd05d184c9 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -1699,7 +1699,7 @@ The conversion process is not always guaranteed to be accurate because small dif x1 = df1.iloc[0, 0] x2 = df2['One'][0] - print("x0 = %f; x1 = %f; Are they equal? %s" % (x0, x1, (x0 == x1))) + print(f"x0 = {x0}; x1 = {x1}; Are they equal? {x0 == x1}") print("x0 = %f; x2 = %f; Are they equal? %s" % (x0, x2, (x0 == x2))) os.remove('test.csv') From 92dd1da49532955e07368c2c1b1778e50b783641 Mon Sep 17 00:00:00 2001 From: Jessica Oraegbu Date: Wed, 29 Oct 2025 08:33:07 -0700 Subject: [PATCH 5/7] Fixing format in floating point precision docs --- doc/source/user_guide/io.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 9b9558543c439..f1b37fbfc056e 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -1700,7 +1700,8 @@ The conversion process is not always guaranteed to be accurate because small dif x2 = df2['One'][0] print(f"x0 = {x0}; x1 = {x1}; Are they equal? {x0 == x1}") - print("x0 = %f; x2 = %f; Are they equal? %s" % (x0, x2, (x0 == x2))) + print(f"x0 = {x0}; x2 = {x2}; Are they equal? {x0 == x2}") + os.remove('test.csv') From 91fd754810d243726ec46fe360410588c3f9a73b Mon Sep 17 00:00:00 2001 From: Jessica Oraegbu Date: Wed, 29 Oct 2025 09:32:48 -0700 Subject: [PATCH 6/7] Fixing format in floating point precision docs --- doc/source/user_guide/io.rst | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index f1b37fbfc056e..5c9418da62fd2 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -1686,25 +1686,21 @@ The conversion process is not always guaranteed to be accurate because small dif ``df = pd.read_csv('file.csv', float_precision='round_trip')`` allows for floating point precision to be specified when reading from the CSV file. This is guaranteed to round-trip values after writing to a file and Pandas will read the numbers without losing or changing decimal places. .. ipython:: python - + from import StringIO import pandas as pd - import os x0 = 18292498239.824 df1 = pd.DataFrame({'One': [x0]}, index=["bignum"]) - df1.to_csv('test.csv', float_format='%.17g') - df2 = pd.read_csv('test.csv', index_col=0, float_precision='round_trip') + csv_string = df1.to_csv(float_format='%.17g') + df2 = pd.read_csv(StringIO(csv_string), index_col=0, float_precision='round_trip') x1 = df1.iloc[0, 0] - x2 = df2['One'][0] + x2 = df2.iloc[0, 0] print(f"x0 = {x0}; x1 = {x1}; Are they equal? {x0 == x1}") print(f"x0 = {x0}; x2 = {x2}; Are they equal? {x0 == x2}") - - os.remove('test.csv') - Writing a formatted string ++++++++++++++++++++++++++ From 52a57265523e6f716c5164868e6557d2f93c0c90 Mon Sep 17 00:00:00 2001 From: Jessica Oraegbu Date: Wed, 29 Oct 2025 10:02:13 -0700 Subject: [PATCH 7/7] Fixing format in floating point precision docs --- doc/source/user_guide/io.rst | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 5c9418da62fd2..a25d8ba39c431 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -1672,7 +1672,7 @@ function takes a number of arguments. Only the first is required. * ``date_format``: Format string for datetime objects Floating Point Precision on Writing and Reading to CSV Files -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Floating Point Precision inaccuracies when writing and reading to CSV files happen due to how the numeric data is represented and parsed in pandas. During the write process, pandas converts all the numeric values into text that is stored as bytes in the CSV file. However, when we read the CSV back, pandas parses those @@ -1686,20 +1686,20 @@ The conversion process is not always guaranteed to be accurate because small dif ``df = pd.read_csv('file.csv', float_precision='round_trip')`` allows for floating point precision to be specified when reading from the CSV file. This is guaranteed to round-trip values after writing to a file and Pandas will read the numbers without losing or changing decimal places. .. ipython:: python - from import StringIO - import pandas as pd - x0 = 18292498239.824 - df1 = pd.DataFrame({'One': [x0]}, index=["bignum"]) + from io import StringIO + + x0 = 18292498239.824 + df1 = pd.DataFrame({'One': [x0]}, index=["bignum"]) - csv_string = df1.to_csv(float_format='%.17g') - df2 = pd.read_csv(StringIO(csv_string), index_col=0, float_precision='round_trip') + csv_string = df1.to_csv(float_format='%.17g') + df2 = pd.read_csv(StringIO(csv_string), index_col=0, float_precision='round_trip') - x1 = df1.iloc[0, 0] - x2 = df2.iloc[0, 0] + x1 = df1.iloc[0, 0] + x2 = df2.iloc[0, 0] - print(f"x0 = {x0}; x1 = {x1}; Are they equal? {x0 == x1}") - print(f"x0 = {x0}; x2 = {x2}; Are they equal? {x0 == x2}") + print(f"x0 = {x0}; x1 = {x1}; Are they equal? {x0 == x1}") + print(f"x0 = {x0}; x2 = {x2}; Are they equal? {x0 == x2}") Writing a formatted string ++++++++++++++++++++++++++