Files
VI_Lab_01_EDA/y-prof.py
2026-02-23 08:21:32 +00:00

37 lines
978 B
Python

import pandas as pd
import seaborn as sns
from ydata_profiling import ProfileReport
import pyreadr
# result = pyreadr.read_r("./data/datasaurus_dozen.rda")
# result.keys()
# df = list(result.values())[0]
# df.head()
# profile = ProfileReport(df, title="Datasaurus Dataset Profile", explorative=True)
# profile.to_file("datasaurus_profile_report.html")
# exit()
# Load the penguins dataset
df = sns.load_dataset("penguins")
csv_path = 'dataset_D_git_classroom_activity_v2.csv' # or D/E/F
df_raw = pd.read_csv(csv_path, dtype=str)
# Display basic info about the dataset
print("Dataset shape:", df.shape)
print("\nFirst 5 rows:")
print(df_raw.head())
# Generate and save the profile report
profile = ProfileReport(df_raw, title="Indie Games Telemetry Dataset Profile", explorative=True)
profile.to_file("git_profile_report.html")
print("\nProfile report saved as 'penguins_profile_report.html'")
print("Open this file in a web browser to view the detailed analysis.")