Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 58 additions & 43 deletions col_compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import pandas as pd
import io

st.title("🔍 Excel Comparator (Full Rows from Multi-Column Match)")
st.title("🔍 Excel Comparator — Side-by-Side Merged View")

file1 = st.file_uploader("📄 Upload First Excel File", type=["xlsx", "xls"], key="file1")
file2 = st.file_uploader("📄 Upload Second Excel File", type=["xlsx", "xls"], key="file2")
Expand All @@ -17,70 +17,85 @@
df1 = pd.read_excel(xls1, sheet_name=sheet1)
df2 = pd.read_excel(xls2, sheet_name=sheet2)

st.write("**File 1 Preview:**")
st.write("**Preview - File 1**")
st.dataframe(df1.head())

st.write("**File 2 Preview:**")
st.write("**Preview - File 2**")
st.dataframe(df2.head())

st.subheader("🔧 Select columns to match by")
cols1 = st.multiselect("File 1 columns", df1.columns, key="cols1")
cols2 = st.multiselect("File 2 columns", df2.columns, key="cols2")
st.subheader("🔧 Select columns to use as unique key")
cols1 = st.multiselect("File 1 key columns", df1.columns, key="cols1")
cols2 = st.multiselect("File 2 key columns", df2.columns, key="cols2")

if len(cols1) != len(cols2):
st.warning("⚠️ Please select the same number of columns from both files.")
st.warning("⚠️ Select same number of columns in both files")
elif cols1 and cols2 and st.button("🔍 Compare Now"):
# Create composite key in both dataframes
# Step 1: Create matching keys
df1["__key__"] = df1[cols1].astype(str).agg(" | ".join, axis=1)
df2["__key__"] = df2[cols2].astype(str).agg(" | ".join, axis=1)

# Identify match and mismatch sets
keys1 = set(df1["__key__"].dropna())
keys2 = set(df2["__key__"].dropna())
# Match keys
match_keys = set(df1["__key__"]).intersection(set(df2["__key__"]))
only1_keys = set(df1["__key__"]) - set(df2["__key__"])
only2_keys = set(df2["__key__"]) - set(df1["__key__"])

match_keys = keys1 & keys2
only1_keys = keys1 - keys2
only2_keys = keys2 - keys1

# Filter full rows
df_match1 = df1[df1["__key__"].isin(match_keys)].drop(columns="__key__")
df_match1 = df1[df1["__key__"].isin(match_keys)].copy()
df_match2 = df2[df2["__key__"].isin(match_keys)].copy()
df_only1 = df1[df1["__key__"].isin(only1_keys)].drop(columns="__key__")
df_match2 = df2[df2["__key__"].isin(match_keys)].drop(columns="__key__")
df_only2 = df2[df2["__key__"].isin(only2_keys)].drop(columns="__key__")

st.success(f"✅ Matches: {len(df_match1)} rows")
st.info(f"📁 Only in File 1: {len(df_only1)} rows")
st.info(f"📁 Only in File 2: {len(df_only2)} rows")
df_match1.set_index("__key__", inplace=True)
df_match2.set_index("__key__", inplace=True)

# Step 2: Merge on __key__
merged = pd.merge(df_match1.add_prefix("F1_"),
df_match2.add_prefix("F2_"),
left_index=True, right_index=True,
how="inner").reset_index().rename(columns={"__key__": "Match_Key"})

st.success(f"✅ Matches: {len(merged)}")
st.info(f"📁 Only in File 1: {len(df_only1)}")
st.info(f"📁 Only in File 2: {len(df_only2)}")

# Step 3: Show merged view
with st.expander("🎯 Merged View of Matched Rows (Side-by-Side)"):
st.dataframe(merged)

# Show expandable results
with st.expander("🎯 Matched Rows from File 1"):
st.dataframe(df_match1)
with st.expander("🎯 Matched Rows from File 2"):
st.dataframe(df_match2)
with st.expander("❌ Only in File 1"):
st.dataframe(df_only1)

with st.expander("❌ Only in File 2"):
st.dataframe(df_only2)

# Export as Excel
def create_excel_file():
output = io.BytesIO()
with pd.ExcelWriter(output, engine="openpyxl") as writer:
df_match1.to_excel(writer, sheet_name="Matched_File1", index=False)
df_match2.to_excel(writer, sheet_name="Matched_File2", index=False)
# Step 4: Optional highlighting (Differences)
def highlight_diff(row):
styles = []
for col in row.index:
if col.startswith("F1_"):
col_f2 = col.replace("F1_", "F2_")
if col_f2 in row.index:
styles.append("background-color: red" if row[col] != row[col_f2] else "")
else:
styles.append("")
else:
styles.append("")
return styles

st.subheader("🎨 Highlighted Differences")
st.dataframe(merged.style.apply(highlight_diff, axis=1))

# Step 5: Download
def create_excel():
buffer = io.BytesIO()
with pd.ExcelWriter(buffer, engine='openpyxl') as writer:
merged.to_excel(writer, sheet_name="Matched_SideBySide", index=False)
df_only1.to_excel(writer, sheet_name="Only_in_File1", index=False)
df_only2.to_excel(writer, sheet_name="Only_in_File2", index=False)
output.seek(0)
return output
buffer.seek(0)
return buffer

excel_output = create_excel_file()
excel_output = create_excel()

st.download_button("⬇️ Download Results as Excel", excel_output,
file_name="comparison_full_rows.xlsx",
st.download_button("⬇️ Download All Results as Excel", excel_output,
file_name="comparison_merged.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")

# Optional CSVs
st.download_button("⬇️ CSV: Only in File 1", df_only1.to_csv(index=False), "only_in_file1.csv", "text/csv")
st.download_button("⬇️ CSV: Only in File 2", df_only2.to_csv(index=False), "only_in_file2.csv", "text/csv")
st.download_button("⬇️ CSV: Matched Rows File 1", df_match1.to_csv(index=False), "matched_file1.csv", "text/csv")
st.download_button("⬇️ CSV: Matched Rows File 2", df_match2.to_csv(index=False), "matched_file2.csv", "text/csv")