In a previous post we used sweetviz to make a summary of a csv file, and ydata profiling to compare 2 different csv files. Well Sweetviz can also be used to make a html report that compares 2 different csv files.
The below code shows a python script that will generate the report comparing 2 csv files with sweetviz.
Usage:
uv run csv_compare_sweetviz.py file1.csv file2.csv

# /// script
# requires-python = "==3.12"
# dependencies = [
# "sweetviz",
# "pandas",
# "click",
# "setuptools<81",
# "pathlib",
# "numpy<2",
# ]
# ///
import sweetviz as sv
import pandas as pd
import click
from pathlib import Path
from datetime import datetime
@click.command()
@click.argument('original_file', type=click.Path(exists=True))
@click.argument('new_file', type=click.Path(exists=True))
@click.option(
"--save_location",
default="/Users/marquin/Documents/sweetviz_summaries",
help="Folder to save the summary report.",
)
def create_summary_report(original_file, new_file, save_location):
"""
Save a summary report of a CSV file using Sweetviz.
"""
# Get the file name from the file path
my_path = Path(original_file)
file_name_01 = my_path.stem
my_path = Path(new_file)
file_name_02 = my_path.stem
# Create the output folder if it does not exist
output_folder = Path(save_location)
output_folder.mkdir(parents=True, exist_ok=True)
# current date and time as string
now = datetime.now()
dt_string = now.strftime("%Y%m%d%H%M%S")
# ensure the file path refers to a CSV file
if not original_file.endswith(".csv"):
raise ValueError("The file path must refer to a CSV file.")
if not new_file.endswith(".csv"):
raise ValueError("The file path must refer to a CSV file.")
# Read in the CSV file as dataframes
df1 = pd.read_csv(original_file)
df2 = pd.read_csv(new_file)
# Create the summary report
#report = sv.analyze(df1)
report = sv.compare([df1, file_name_01], [df2, file_name_02])
# Save the report to an HTML file and open it in the default browser
report.show_html(output_folder / f"{dt_string}-{file_name_01}.html")
if __name__ == "__main__":
create_summary_report()