{ "cells": [ { "cell_type": "markdown", "id": "64f0fe5d", "metadata": {}, "source": [ "**43679 - Interactive Visualization**\n", "**2025 - 2026**\n", "*2nd semester*\n", "\n", "**Lab 01** - Task 0\n", "Exploring the value of Visualization to go beyond descriptive statistics" ] }, { "cell_type": "code", "execution_count": 1, "id": "d9080704", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import seaborn as sns\n", "from ydata_profiling import ProfileReport\n", "import pyreadr" ] }, { "cell_type": "code", "execution_count": 2, "id": "64c538cf", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " event_id user_id repo_id timestamp \\\n", "0 evt_d351e59b15fd user_2432 repo_575 29/03/2025 17:26 \n", "1 evt_435c1b33622f user_2017 repo_1112 01/07/2025 12:20 \n", "2 evt_758099c90286 user_930 repo_103 2025-01-30 02:26:34+00:00 \n", "3 evt_312809052420 user_1892 repo_988 2025-03-21 08:01:25-05:00 \n", "4 evt_0b2d75d29ec3 user_2793 repo_419 2025-02-28 18:22:51-05:00 \n", "\n", " event_type lines_added lines_deleted files_changed dominant_language \\\n", "0 pr_opened 40 2 3 Python \n", "1 Commit 3 24 2 GO \n", "2 pr_merged 13 12 11 Rust \n", "3 pr_opened 28 6 3 C++ \n", "4 Review_comment 79 2 3 C++ \n", "\n", " ci_status ... time_to_ci_minutes build_duration_s tests_run tests_failed \\\n", "0 SUCCESS ... 13.38 493.98 115 8 \n", "1 FAILED ... 16.86 107.57 90 14 \n", "2 failed ... 448.32 193.38 92 6 \n", "3 SUCCESS ... NaN 498.92 177 12 \n", "4 failure ... 1.14 162.55 113 9 \n", "\n", " is_weekend pr_merge_time_hours label_is_high_quality exam_period \\\n", "0 1 54.6 0 true \n", "1 False NaN 0 No \n", "2 No 68.6 False false \n", "3 0 50.6 No No \n", "4 false NaN False false \n", "\n", " commit_message_length is_bot_user \n", "0 39.0 HUMAN \n", "1 65.0 Bot \n", "2 79.0 Human \n", "3 NaN BOT \n", "4 48.0 human \n", "\n", "[5 rows x 23 columns]\n" ] } ], "source": [ "csv_path = 'dataset_D_git_classroom_activity_v2.csv' # or D/E/F\n", "df = pd.read_csv(csv_path, dtype=str)\n", "\n", "print(df.head())" ] }, { "cell_type": "code", "execution_count": 5, "id": "8b5b7074", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "235ae862343743c189592812139566b2", "version_major": 2, "version_minor": 0 }, "text/plain": [ " | | [ 0%] 00:00 -> (? left)" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Report sweetviz_report.html was generated! NOTEBOOK/COLAB USERS: the web browser MAY not pop up, regardless, the report IS saved in your notebook/colab files.\n" ] } ], "source": [ "import sweetviz as sv\n", "report = sv.analyze(df)\n", "report.show_html(\"sweetviz_report.html\")" ] }, { "cell_type": "code", "execution_count": 6, "id": "f9192f8c", "metadata": {}, "outputs": [], "source": [ "import dtale\n", "d = dtale.show(df)\n", "d.open_browser()\n" ] }, { "cell_type": "code", "execution_count": 7, "id": "6588c7d5", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "6da981bddb8148c6b90b1e959bf69e81", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Summarize dataset: 0%| | 0/5 [00:00