In a previous post we used sweetviz to make a summary of a csv file, and ydata profiling to compare 2 different csv files. Well Sweetviz can also be used to make a html report that compares 2 different csv files.
The below code shows a python script that will generate the report comparing 2 csv files with sweetviz.
Usage:
uv run csv_compare_sweetviz.py file1.csv file2.csv

# /// script
# requires-python = "==3.12"
# dependencies = [
#   "sweetviz",
#   "pandas",
#   "click",
#   "setuptools<81",
#   "pathlib",
#   "numpy<2",
# ]
# ///
 
import sweetviz as sv
import pandas as pd
import click
from pathlib import Path
from datetime import datetime
 
 
@click.command()
@click.argument('original_file', type=click.Path(exists=True))
@click.argument('new_file', type=click.Path(exists=True))
@click.option(
    "--save_location",
    default="/Users/marquin/Documents/sweetviz_summaries",
    help="Folder to save the summary report.",
)
def create_summary_report(original_file, new_file, save_location):
    """
    Save a summary report of a CSV file using Sweetviz.
 
    """
 
    # Get the file name from the file path
    my_path = Path(original_file)
    file_name_01 = my_path.stem
    my_path = Path(new_file)
    file_name_02 = my_path.stem
 
    # Create the output folder if it does not exist
    output_folder = Path(save_location)
    output_folder.mkdir(parents=True, exist_ok=True)
 
    # current date and time as string
    now = datetime.now()
    dt_string = now.strftime("%Y%m%d%H%M%S")
 
    # ensure the file path refers to a CSV file
    if not original_file.endswith(".csv"):
        raise ValueError("The file path must refer to a CSV file.")
    if not new_file.endswith(".csv"):
        raise ValueError("The file path must refer to a CSV file.")
 
    # Read in the CSV file as dataframes
    df1 = pd.read_csv(original_file)
    df2 = pd.read_csv(new_file)
    # Create the summary report
    #report = sv.analyze(df1)
    report = sv.compare([df1, file_name_01], [df2, file_name_02])
 
    # Save the report to an HTML file and open it in the default browser
    report.show_html(output_folder / f"{dt_string}-{file_name_01}.html")
if __name__ == "__main__":
    create_summary_report()