This commit is contained in:
2026-02-23 08:21:32 +00:00
parent 52e38435fa
commit ed360f9967
34 changed files with 73045 additions and 37 deletions

320
EDA_Lab_Starter.ipynb Normal file
View File

@@ -0,0 +1,320 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Interactive EDA Lab Starter (SweetViz & DTale)\n",
"Use this notebook to explore the datasets and practice cleaning common EDA issues."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# Optional installs:\n",
"# !pip install -q sweetviz dtale pandas numpy\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>session_id</th>\n",
" <th>user_id</th>\n",
" <th>start_time</th>\n",
" <th>end_time</th>\n",
" <th>session_length_s</th>\n",
" <th>region</th>\n",
" <th>platform</th>\n",
" <th>gpu_model</th>\n",
" <th>avg_fps</th>\n",
" <th>ping_ms</th>\n",
" <th>map_name</th>\n",
" <th>crash_flag</th>\n",
" <th>purchase_amount</th>\n",
" <th>party_size</th>\n",
" <th>input_method</th>\n",
" <th>build_version</th>\n",
" <th>is_featured_event</th>\n",
" <th>device_temp_c</th>\n",
" <th>session_type</th>\n",
" <th>is_long_session</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>sess_c2fba8e7f37a</td>\n",
" <td>user_488</td>\n",
" <td>2025-07-18T18:32:00Z</td>\n",
" <td>2025-07-18 20:03:21-05:00</td>\n",
" <td>5481.0</td>\n",
" <td>us-west</td>\n",
" <td>pc</td>\n",
" <td>GTX1080</td>\n",
" <td>83.52</td>\n",
" <td>431.16</td>\n",
" <td>ocean</td>\n",
" <td>Yes</td>\n",
" <td>0,00</td>\n",
" <td>2</td>\n",
" <td>Touch</td>\n",
" <td>NaN</td>\n",
" <td>No</td>\n",
" <td>85.6</td>\n",
" <td>ranked</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>sess_33d286298cf9</td>\n",
" <td>user_1511</td>\n",
" <td>2025-06-13 23:21:08+00:00</td>\n",
" <td>2025-06-13 23:36:30+01:00</td>\n",
" <td>922.0</td>\n",
" <td>Us-east</td>\n",
" <td>PlayStation</td>\n",
" <td>NaN</td>\n",
" <td>72.75</td>\n",
" <td>29.12</td>\n",
" <td>desert</td>\n",
" <td>No</td>\n",
" <td>0.0</td>\n",
" <td>3</td>\n",
" <td>Touch</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>62.0</td>\n",
" <td>casual</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>sess_be2bb4d8986a</td>\n",
" <td>user_830</td>\n",
" <td>2025-10-20 02:42:07-05:00</td>\n",
" <td>20/10/2025 02:49</td>\n",
" <td>451.0</td>\n",
" <td>sa-east-1</td>\n",
" <td>PlayStation</td>\n",
" <td>NaN</td>\n",
" <td>69.2</td>\n",
" <td>40.47</td>\n",
" <td>Forest</td>\n",
" <td>False</td>\n",
" <td>0.0</td>\n",
" <td>5</td>\n",
" <td>TOUCH</td>\n",
" <td>1.4</td>\n",
" <td>False</td>\n",
" <td>69.0</td>\n",
" <td>ranked</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>sess_7f425ca9a0e2</td>\n",
" <td>user_1</td>\n",
" <td>08/01/2025 06:35</td>\n",
" <td>2025-08-01T08:32:45Z</td>\n",
" <td>7031.0</td>\n",
" <td>sa-east-1</td>\n",
" <td>PlayStation</td>\n",
" <td>NaN</td>\n",
" <td>33.29</td>\n",
" <td>92.4</td>\n",
" <td>Desert</td>\n",
" <td>No</td>\n",
" <td>17.55</td>\n",
" <td>1</td>\n",
" <td>Controller</td>\n",
" <td>1.3.2</td>\n",
" <td>0</td>\n",
" <td>48.1</td>\n",
" <td>casual</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>sess_5657e28b22ec</td>\n",
" <td>user_211</td>\n",
" <td>2025-09-08T23:41:44Z</td>\n",
" <td>2025-09-09 00:32:59+01:00</td>\n",
" <td>3075.0</td>\n",
" <td>US-EAST</td>\n",
" <td>switch</td>\n",
" <td>NaN</td>\n",
" <td>69.96</td>\n",
" <td>12.63</td>\n",
" <td>Desert</td>\n",
" <td>False</td>\n",
" <td>0.0</td>\n",
" <td>2</td>\n",
" <td>controllr</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>54.7</td>\n",
" <td>casual</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" session_id user_id start_time \\\n",
"0 sess_c2fba8e7f37a user_488 2025-07-18T18:32:00Z \n",
"1 sess_33d286298cf9 user_1511 2025-06-13 23:21:08+00:00 \n",
"2 sess_be2bb4d8986a user_830 2025-10-20 02:42:07-05:00 \n",
"3 sess_7f425ca9a0e2 user_1 08/01/2025 06:35 \n",
"4 sess_5657e28b22ec user_211 2025-09-08T23:41:44Z \n",
"\n",
" end_time session_length_s region platform \\\n",
"0 2025-07-18 20:03:21-05:00 5481.0 us-west pc \n",
"1 2025-06-13 23:36:30+01:00 922.0 Us-east PlayStation \n",
"2 20/10/2025 02:49 451.0 sa-east-1 PlayStation \n",
"3 2025-08-01T08:32:45Z 7031.0 sa-east-1 PlayStation \n",
"4 2025-09-09 00:32:59+01:00 3075.0 US-EAST switch \n",
"\n",
" gpu_model avg_fps ping_ms map_name crash_flag purchase_amount party_size \\\n",
"0 GTX1080 83.52 431.16 ocean Yes 0,00 2 \n",
"1 NaN 72.75 29.12 desert No 0.0 3 \n",
"2 NaN 69.2 40.47 Forest False 0.0 5 \n",
"3 NaN 33.29 92.4 Desert No 17.55 1 \n",
"4 NaN 69.96 12.63 Desert False 0.0 2 \n",
"\n",
" input_method build_version is_featured_event device_temp_c session_type \\\n",
"0 Touch NaN No 85.6 ranked \n",
"1 Touch NaN 0 62.0 casual \n",
"2 TOUCH 1.4 False 69.0 ranked \n",
"3 Controller 1.3.2 0 48.1 casual \n",
"4 controllr NaN 0 54.7 casual \n",
"\n",
" is_long_session \n",
"0 True \n",
"1 0 \n",
"2 False \n",
"3 True \n",
"4 Yes "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"csv_path = 'dataset_A_indie_game_telemetry_v2.csv' # or D/E/F\n",
"df_raw = pd.read_csv(csv_path, dtype=str)\n",
"df_raw.head()\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "84e6ef6f5e434536a0dcf69111e15ef9",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" | | [ 0%] 00:00 -> (? left)"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Report sweetviz_report.html was generated! NOTEBOOK/COLAB USERS: the web browser MAY not pop up, regardless, the report IS saved in your notebook/colab files.\n",
"SweetViz report written to sweetviz_report.html\n"
]
}
],
"source": [
"import sweetviz as sv\n",
"report = sv.analyze(df_raw)\n",
"report.show_html('sweetviz_report.html')\n",
"print('SweetViz report written to sweetviz_report.html')\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"import dtale\n",
"d = dtale.show(df_raw)\n",
"d.open_browser()\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## EDA Quests\n",
"- Parse timestamps\n",
"- Fix units (minutes vs seconds)\n",
"- Normalize categories & booleans\n",
"- Convert comma-decimals to floats\n",
"- Identify & treat outliers\n",
"- Re-run SweetViz and compare before/after\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -1,6 +1,7 @@
# VI_Lab_01_EDA # VI_Lab_01_EDA
## Datasaurus dozen
https://cran.r-project.org/web/packages/datasauRus/index.html

File diff suppressed because one or more lines are too long

View File

@@ -15,20 +15,390 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 1,
"id": "d9080704", "id": "d9080704",
"metadata": { "metadata": {},
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [], "outputs": [],
"source": [] "source": [
"import pandas as pd\n",
"import seaborn as sns\n",
"from ydata_profiling import ProfileReport\n",
"import pyreadr"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "64c538cf",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" event_id user_id repo_id timestamp \\\n",
"0 evt_d351e59b15fd user_2432 repo_575 29/03/2025 17:26 \n",
"1 evt_435c1b33622f user_2017 repo_1112 01/07/2025 12:20 \n",
"2 evt_758099c90286 user_930 repo_103 2025-01-30 02:26:34+00:00 \n",
"3 evt_312809052420 user_1892 repo_988 2025-03-21 08:01:25-05:00 \n",
"4 evt_0b2d75d29ec3 user_2793 repo_419 2025-02-28 18:22:51-05:00 \n",
"\n",
" event_type lines_added lines_deleted files_changed dominant_language \\\n",
"0 pr_opened 40 2 3 Python \n",
"1 Commit 3 24 2 GO \n",
"2 pr_merged 13 12 11 Rust \n",
"3 pr_opened 28 6 3 C++ \n",
"4 Review_comment 79 2 3 C++ \n",
"\n",
" ci_status ... time_to_ci_minutes build_duration_s tests_run tests_failed \\\n",
"0 SUCCESS ... 13.38 493.98 115 8 \n",
"1 FAILED ... 16.86 107.57 90 14 \n",
"2 failed ... 448.32 193.38 92 6 \n",
"3 SUCCESS ... NaN 498.92 177 12 \n",
"4 failure ... 1.14 162.55 113 9 \n",
"\n",
" is_weekend pr_merge_time_hours label_is_high_quality exam_period \\\n",
"0 1 54.6 0 true \n",
"1 False NaN 0 No \n",
"2 No 68.6 False false \n",
"3 0 50.6 No No \n",
"4 false NaN False false \n",
"\n",
" commit_message_length is_bot_user \n",
"0 39.0 HUMAN \n",
"1 65.0 Bot \n",
"2 79.0 Human \n",
"3 NaN BOT \n",
"4 48.0 human \n",
"\n",
"[5 rows x 23 columns]\n"
]
}
],
"source": [
"csv_path = 'dataset_D_git_classroom_activity_v2.csv' # or D/E/F\n",
"df = pd.read_csv(csv_path, dtype=str)\n",
"\n",
"print(df.head())"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "8b5b7074",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "235ae862343743c189592812139566b2",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" | | [ 0%] 00:00 -> (? left)"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Report sweetviz_report.html was generated! NOTEBOOK/COLAB USERS: the web browser MAY not pop up, regardless, the report IS saved in your notebook/colab files.\n"
]
}
],
"source": [
"import sweetviz as sv\n",
"report = sv.analyze(df)\n",
"report.show_html(\"sweetviz_report.html\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "f9192f8c",
"metadata": {},
"outputs": [],
"source": [
"import dtale\n",
"d = dtale.show(df)\n",
"d.open_browser()\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "6588c7d5",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "6da981bddb8148c6b90b1e959bf69e81",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Summarize dataset: 0%| | 0/5 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"d:\\Projects\\43679_InteractiveVis\\VI_Lab_01_EDA\\.venv\\Lib\\site-packages\\ydata_profiling\\model\\typeset_relations.py:118: FutureWarning:\n",
"\n",
"In a future version of pandas, parsing datetimes with mixed time zones will raise an error unless `utc=True`. Please specify `utc=True` to opt in to the new behaviour and silence this warning. To create a `Series` with mixed offsets and `object` dtype, please use `apply` and `datetime.datetime.strptime`\n",
"\n",
"d:\\Projects\\43679_InteractiveVis\\VI_Lab_01_EDA\\.venv\\Lib\\site-packages\\ydata_profiling\\model\\typeset_relations.py:118: FutureWarning:\n",
"\n",
"In a future version of pandas, parsing datetimes with mixed time zones will raise an error unless `utc=True`. Please specify `utc=True` to opt in to the new behaviour and silence this warning. To create a `Series` with mixed offsets and `object` dtype, please use `apply` and `datetime.datetime.strptime`\n",
"\n",
"d:\\Projects\\43679_InteractiveVis\\VI_Lab_01_EDA\\.venv\\Lib\\site-packages\\ydata_profiling\\model\\typeset_relations.py:118: FutureWarning:\n",
"\n",
"In a future version of pandas, parsing datetimes with mixed time zones will raise an error unless `utc=True`. Please specify `utc=True` to opt in to the new behaviour and silence this warning. To create a `Series` with mixed offsets and `object` dtype, please use `apply` and `datetime.datetime.strptime`\n",
"\n",
"d:\\Projects\\43679_InteractiveVis\\VI_Lab_01_EDA\\.venv\\Lib\\site-packages\\ydata_profiling\\model\\typeset_relations.py:118: FutureWarning:\n",
"\n",
"In a future version of pandas, parsing datetimes with mixed time zones will raise an error unless `utc=True`. Please specify `utc=True` to opt in to the new behaviour and silence this warning. To create a `Series` with mixed offsets and `object` dtype, please use `apply` and `datetime.datetime.strptime`\n",
"\n",
"100%|██████████| 23/23 [00:02<00:00, 10.28it/s]\n",
"2026-02-22 18:18:00,663 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:00,665 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:01,181 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:01,184 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:01,769 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:01,771 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:02,102 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:02,104 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:06,881 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:06,883 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:15,152 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:15,153 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:15,879 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:15,881 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:16,450 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:16,451 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:17,432 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:17,434 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:18,477 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:18,478 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:18,927 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:18,928 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:19,290 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:19,291 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:19,537 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:19,539 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:23,884 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:23,886 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:32,551 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:32,552 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:33,203 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:33,206 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:33,680 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:33,682 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:34,583 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:34,585 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:35,018 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:35,020 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:35,361 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:35,362 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:35,606 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:35,608 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:35,731 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:35,732 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:40,821 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:40,824 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:49,090 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:49,092 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:49,640 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:49,642 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:50,002 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:50,004 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:50,868 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:50,869 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:51,180 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:51,185 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:55,498 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:55,504 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:59,720 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:18:59,726 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:19:05,342 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:19:05,349 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:19:13,151 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:19:13,157 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:19:26,049 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:19:26,055 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:19:30,583 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:19:30,588 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:19:35,069 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:19:35,073 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:19:38,983 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:19:38,988 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:19:45,280 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:19:45,294 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:19:52,574 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:19:52,589 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:19:59,780 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:19:59,793 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:20:09,561 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:20:09,574 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:20:20,546 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:20:20,559 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:20:37,612 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:20:37,625 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:20:45,120 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:20:45,134 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:20:52,334 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:20:52,339 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:00,146 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:00,159 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:07,049 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:07,051 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:07,799 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:07,801 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:08,453 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:08,455 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:08,989 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:08,991 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:13,508 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:13,511 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:21,077 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:21,079 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:22,011 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:22,013 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:22,775 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:22,776 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:23,964 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:23,966 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:24,694 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:24,696 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:25,246 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:25,248 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:25,721 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:25,723 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:26,091 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:26,093 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:30,494 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:30,495 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:42,759 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:42,760 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:43,548 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:43,550 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:44,121 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:44,122 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:45,072 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:45,073 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:45,610 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:45,612 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:46,597 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:46,599 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:47,495 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:47,497 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:48,310 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:48,312 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:52,223 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:52,226 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:55,903 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:55,907 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:57,084 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:57,087 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:58,045 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:58,047 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:59,518 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:21:59,520 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:22:00,485 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:22:00,487 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:22:01,013 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:22:01,015 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:22:01,438 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:22:01,439 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:22:01,743 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:22:01,745 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:22:05,940 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:22:05,942 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:22:12,901 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:22:12,903 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:22:13,646 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:22:13,648 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:22:14,203 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:22:14,205 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:22:15,198 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n",
"2026-02-22 18:22:15,199 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "e92ab4eef2e64b41a613807fdf8c6a7c",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Generate report structure: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "aa07e39a84b748a7b81430aa794762bb",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Render HTML: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "1c42855b0063421daf892af1da802367",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Export report to file: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"profile = ProfileReport(df, title=\"Indie Games Telemetry Dataset Profile\", explorative=True)\n",
"profile.to_file(\"ydata_profile_report.html\")"
]
} }
], ],
"metadata": { "metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": { "language_info": {
"name": "python" "codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
} }
}, },
"nbformat": 4, "nbformat": 4,

1847
claude/datasaurus_dozen.csv Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Binary file not shown.

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,829 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Lab 02 · Task 2 — Guided EDA and Data Cleaning with SweetViz & D-Tale\n",
"\n",
"**Estimated time:** ~50 minutes \n",
"**Dataset:** `dataset_A_indie_game_telemetry.csv`\n",
"\n",
"---\n",
"\n",
"### Objectives\n",
"\n",
"By the end of this task you will be able to:\n",
"- Generate an automated EDA report with **SweetViz** to get a rapid overview of a dataset\n",
"- Use **D-Tale** interactively to identify and fix data quality problems\n",
"- Recognise the most common categories of data issues: inconsistent encoding, mixed types, excessive missingness, and outliers\n",
"- Understand how interactive tools translate cleaning actions into pandas code\n",
"\n",
"---\n",
"\n",
"### Context\n",
"\n",
"You have been handed a telemetry dataset from a small indie game studio. It contains **10,000 session records** with information about players, platforms, performance metrics, and purchases. Before any visualisation or analysis can be built on top of this data, it must be understood and cleaned.\n",
"\n",
"This is real-world data quality: messy, inconsistent, and requiring decisions — not just mechanical fixes.\n",
"\n",
"---"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Part 1 — Setup and First Load"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Shape: (10000, 20)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>session_id</th>\n",
" <th>user_id</th>\n",
" <th>start_time</th>\n",
" <th>end_time</th>\n",
" <th>session_length_s</th>\n",
" <th>region</th>\n",
" <th>platform</th>\n",
" <th>gpu_model</th>\n",
" <th>avg_fps</th>\n",
" <th>ping_ms</th>\n",
" <th>map_name</th>\n",
" <th>crash_flag</th>\n",
" <th>purchase_amount</th>\n",
" <th>party_size</th>\n",
" <th>input_method</th>\n",
" <th>build_version</th>\n",
" <th>is_featured_event</th>\n",
" <th>device_temp_c</th>\n",
" <th>session_type</th>\n",
" <th>is_long_session</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>sess_c2fba8e7f37a</td>\n",
" <td>user_488</td>\n",
" <td>2025-07-18T18:32:00Z</td>\n",
" <td>2025-07-18 20:03:21-05:00</td>\n",
" <td>5481.0</td>\n",
" <td>us-west</td>\n",
" <td>pc</td>\n",
" <td>GTX1080</td>\n",
" <td>83.52</td>\n",
" <td>431.16</td>\n",
" <td>ocean</td>\n",
" <td>Yes</td>\n",
" <td>0,00</td>\n",
" <td>2</td>\n",
" <td>Touch</td>\n",
" <td>NaN</td>\n",
" <td>No</td>\n",
" <td>85.6</td>\n",
" <td>ranked</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>sess_33d286298cf9</td>\n",
" <td>user_1511</td>\n",
" <td>2025-06-13 23:21:08+00:00</td>\n",
" <td>2025-06-13 23:36:30+01:00</td>\n",
" <td>922.0</td>\n",
" <td>Us-east</td>\n",
" <td>PlayStation</td>\n",
" <td>NaN</td>\n",
" <td>72.75</td>\n",
" <td>29.12</td>\n",
" <td>desert</td>\n",
" <td>No</td>\n",
" <td>0.0</td>\n",
" <td>3</td>\n",
" <td>Touch</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>62.0</td>\n",
" <td>casual</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>sess_be2bb4d8986a</td>\n",
" <td>user_830</td>\n",
" <td>2025-10-20 02:42:07-05:00</td>\n",
" <td>20/10/2025 02:49</td>\n",
" <td>451.0</td>\n",
" <td>sa-east-1</td>\n",
" <td>PlayStation</td>\n",
" <td>NaN</td>\n",
" <td>69.20</td>\n",
" <td>40.47</td>\n",
" <td>Forest</td>\n",
" <td>False</td>\n",
" <td>0.0</td>\n",
" <td>5</td>\n",
" <td>TOUCH</td>\n",
" <td>1.4</td>\n",
" <td>False</td>\n",
" <td>69.0</td>\n",
" <td>ranked</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>sess_7f425ca9a0e2</td>\n",
" <td>user_1</td>\n",
" <td>08/01/2025 06:35</td>\n",
" <td>2025-08-01T08:32:45Z</td>\n",
" <td>7031.0</td>\n",
" <td>sa-east-1</td>\n",
" <td>PlayStation</td>\n",
" <td>NaN</td>\n",
" <td>33.29</td>\n",
" <td>92.40</td>\n",
" <td>Desert</td>\n",
" <td>No</td>\n",
" <td>17.55</td>\n",
" <td>1</td>\n",
" <td>Controller</td>\n",
" <td>1.3.2</td>\n",
" <td>0</td>\n",
" <td>48.1</td>\n",
" <td>casual</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>sess_5657e28b22ec</td>\n",
" <td>user_211</td>\n",
" <td>2025-09-08T23:41:44Z</td>\n",
" <td>2025-09-09 00:32:59+01:00</td>\n",
" <td>3075.0</td>\n",
" <td>US-EAST</td>\n",
" <td>switch</td>\n",
" <td>NaN</td>\n",
" <td>69.96</td>\n",
" <td>12.63</td>\n",
" <td>Desert</td>\n",
" <td>False</td>\n",
" <td>0.0</td>\n",
" <td>2</td>\n",
" <td>controllr</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>54.7</td>\n",
" <td>casual</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" session_id user_id start_time \\\n",
"0 sess_c2fba8e7f37a user_488 2025-07-18T18:32:00Z \n",
"1 sess_33d286298cf9 user_1511 2025-06-13 23:21:08+00:00 \n",
"2 sess_be2bb4d8986a user_830 2025-10-20 02:42:07-05:00 \n",
"3 sess_7f425ca9a0e2 user_1 08/01/2025 06:35 \n",
"4 sess_5657e28b22ec user_211 2025-09-08T23:41:44Z \n",
"\n",
" end_time session_length_s region platform \\\n",
"0 2025-07-18 20:03:21-05:00 5481.0 us-west pc \n",
"1 2025-06-13 23:36:30+01:00 922.0 Us-east PlayStation \n",
"2 20/10/2025 02:49 451.0 sa-east-1 PlayStation \n",
"3 2025-08-01T08:32:45Z 7031.0 sa-east-1 PlayStation \n",
"4 2025-09-09 00:32:59+01:00 3075.0 US-EAST switch \n",
"\n",
" gpu_model avg_fps ping_ms map_name crash_flag purchase_amount party_size \\\n",
"0 GTX1080 83.52 431.16 ocean Yes 0,00 2 \n",
"1 NaN 72.75 29.12 desert No 0.0 3 \n",
"2 NaN 69.20 40.47 Forest False 0.0 5 \n",
"3 NaN 33.29 92.40 Desert No 17.55 1 \n",
"4 NaN 69.96 12.63 Desert False 0.0 2 \n",
"\n",
" input_method build_version is_featured_event device_temp_c session_type \\\n",
"0 Touch NaN No 85.6 ranked \n",
"1 Touch NaN 0 62.0 casual \n",
"2 TOUCH 1.4 False 69.0 ranked \n",
"3 Controller 1.3.2 0 48.1 casual \n",
"4 controllr NaN 0 54.7 casual \n",
"\n",
" is_long_session \n",
"0 True \n",
"1 0 \n",
"2 False \n",
"3 True \n",
"4 Yes "
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"import sweetviz as sv\n",
"import dtale\n",
"import warnings\n",
"warnings.filterwarnings('ignore')\n",
"\n",
"# Load the raw dataset — do NOT clean anything yet\n",
"df = pd.read_csv('dataset_A_indie_game_telemetry_v2.csv')\n",
"\n",
"print(f'Shape: {df.shape}')\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Column types (as loaded):\n",
"session_id object\n",
"user_id object\n",
"start_time object\n",
"end_time object\n",
"session_length_s float64\n",
"region object\n",
"platform object\n",
"gpu_model object\n",
"avg_fps float64\n",
"ping_ms float64\n",
"map_name object\n",
"crash_flag object\n",
"purchase_amount object\n",
"party_size int64\n",
"input_method object\n",
"build_version object\n",
"is_featured_event object\n",
"device_temp_c float64\n",
"session_type object\n",
"is_long_session object\n",
"dtype: object\n"
]
}
],
"source": [
"# Quick look at column types as pandas inferred them\n",
"print('Column types (as loaded):')\n",
"print(df.dtypes)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"> **⚠️ Notice:** Several columns that should be boolean (`crash_flag`, `is_featured_event`, `is_long_session`) or numeric (`purchase_amount`) have been inferred as `object`. This is your first signal that something is wrong.\n",
"\n",
"---\n",
"\n",
"## Part 2 — Automated Overview with SweetViz\n",
"\n",
"Before diving into manual inspection, generate a SweetViz report. This gives you a visual overview of every column in one step — distributions, types, missing values, and anomalies.\n",
"\n",
"**Think of SweetViz as your \"triage\" tool.** It shows you *where* to look; D-Tale is where you look *closely*."
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "bd10cd653e7a47f891552a79e946376c",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" | | [ 0%] 00:00 -> (? left)"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Report sweetviz_raw_report.html was generated! NOTEBOOK/COLAB USERS: the web browser MAY not pop up, regardless, the report IS saved in your notebook/colab files.\n",
"Report saved as sweetviz_raw_report.html — open it in your browser.\n"
]
}
],
"source": [
"# Generate the SweetViz report\n",
"# This may take 3060 seconds\n",
"report = sv.analyze(df_raw)\n",
"report.show_html('sweetviz_raw_report.html')\n",
"\n",
"print('Report saved as sweetviz_raw_report.html — open it in your browser.')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 📋 SweetViz Checklist — What to look for\n",
"\n",
"Open `sweetviz_raw_report.html` and answer the following questions. Write your findings below before moving on.\n",
"\n",
"| Question | Your finding |\n",
"|---|---|\n",
"| Which columns have missing values? Which has the most? | *...* |\n",
"| Which columns are listed as TEXT but should be numeric or boolean? | *...* |\n",
"| Are there any numeric columns with suspicious ranges (very high max or very low min)? | *...* |\n",
"| How many unique values does `region` have? Does that seem right? | *...* |\n",
"| What is unusual about `purchase_amount`? | *...* |\n",
"\n",
"*(Double-click to fill in your answers)*\n",
"\n",
"---\n",
"\n",
"## Part 3 — Deep Inspection and Cleaning with D-Tale\n",
"\n",
"D-Tale opens the dataset in an interactive grid. You can sort, filter, inspect, and clean without writing a single line of pandas — but D-Tale records every action as code you can export later.\n",
"\n",
"**Launch D-Tale now:**"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2026-02-22 20:12:55,619 - INFO - D-Tale started at: http://127.0.0.1:40000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Open D-Tale at: http://127.0.0.1:40000\n"
]
}
],
"source": [
"# Launch D-Tale with the raw dataset\n",
"# A link will appear — click it to open D-Tale in a new browser ta\n",
"d = dtale.show(df_raw, host='127.0.0.1', subprocess=False, open_browser=True)\n",
"print(\"Open D-Tale at:\", d._url) # lists all running instances\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4c2e5293",
"metadata": {},
"outputs": [
{
"ename": "TypeError",
"evalue": "bad operand type for abs(): 'str'",
"output_type": "error",
"traceback": [
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
"\u001b[31mTypeError\u001b[39m Traceback (most recent call last)",
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[16]\u001b[39m\u001b[32m, line 21\u001b[39m\n\u001b[32m 18\u001b[39m \tstr_data = pd.to_numeric(s, errors=\u001b[33m'\u001b[39m\u001b[33mcoerce\u001b[39m\u001b[33m'\u001b[39m)\n\u001b[32m 19\u001b[39m pd.Series(str_data, name=\u001b[33m'\u001b[39m\u001b[33mpurchase_amount\u001b[39m\u001b[33m'\u001b[39m, index=s.index)\n\u001b[32m---> \u001b[39m\u001b[32m21\u001b[39m df[\u001b[33m'\u001b[39m\u001b[33mpurchase_amount\u001b[39m\u001b[33m'\u001b[39m] = \u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mpurchase_amount\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m.\u001b[49m\u001b[43mabs\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32md:\\Projects\\43679_InteractiveVis\\VI_Lab_01_EDA\\.venv\\Lib\\site-packages\\pandas\\core\\generic.py:1722\u001b[39m, in \u001b[36mNDFrame.abs\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 1654\u001b[39m \u001b[38;5;129m@final\u001b[39m\n\u001b[32m 1655\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mabs\u001b[39m(\u001b[38;5;28mself\u001b[39m) -> Self:\n\u001b[32m 1656\u001b[39m \u001b[38;5;250m \u001b[39m\u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m 1657\u001b[39m \u001b[33;03m Return a Series/DataFrame with absolute numeric value of each element.\u001b[39;00m\n\u001b[32m 1658\u001b[39m \n\u001b[32m (...)\u001b[39m\u001b[32m 1720\u001b[39m \u001b[33;03m 3 7 40 -50\u001b[39;00m\n\u001b[32m 1721\u001b[39m \u001b[33;03m \"\"\"\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1722\u001b[39m res_mgr = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_mgr\u001b[49m\u001b[43m.\u001b[49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnp\u001b[49m\u001b[43m.\u001b[49m\u001b[43mabs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1723\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._constructor_from_mgr(res_mgr, axes=res_mgr.axes).__finalize__(\n\u001b[32m 1724\u001b[39m \u001b[38;5;28mself\u001b[39m, name=\u001b[33m\"\u001b[39m\u001b[33mabs\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 1725\u001b[39m )\n",
"\u001b[36mFile \u001b[39m\u001b[32md:\\Projects\\43679_InteractiveVis\\VI_Lab_01_EDA\\.venv\\Lib\\site-packages\\pandas\\core\\internals\\managers.py:361\u001b[39m, in \u001b[36mBaseBlockManager.apply\u001b[39m\u001b[34m(self, f, align_keys, **kwargs)\u001b[39m\n\u001b[32m 358\u001b[39m kwargs[k] = obj[b.mgr_locs.indexer]\n\u001b[32m 360\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcallable\u001b[39m(f):\n\u001b[32m--> \u001b[39m\u001b[32m361\u001b[39m applied = \u001b[43mb\u001b[49m\u001b[43m.\u001b[49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 362\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 363\u001b[39m applied = \u001b[38;5;28mgetattr\u001b[39m(b, f)(**kwargs)\n",
"\u001b[36mFile \u001b[39m\u001b[32md:\\Projects\\43679_InteractiveVis\\VI_Lab_01_EDA\\.venv\\Lib\\site-packages\\pandas\\core\\internals\\blocks.py:395\u001b[39m, in \u001b[36mBlock.apply\u001b[39m\u001b[34m(self, func, **kwargs)\u001b[39m\n\u001b[32m 389\u001b[39m \u001b[38;5;129m@final\u001b[39m\n\u001b[32m 390\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mapply\u001b[39m(\u001b[38;5;28mself\u001b[39m, func, **kwargs) -> \u001b[38;5;28mlist\u001b[39m[Block]:\n\u001b[32m 391\u001b[39m \u001b[38;5;250m \u001b[39m\u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m 392\u001b[39m \u001b[33;03m apply the function to my values; return a block if we are not\u001b[39;00m\n\u001b[32m 393\u001b[39m \u001b[33;03m one\u001b[39;00m\n\u001b[32m 394\u001b[39m \u001b[33;03m \"\"\"\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m395\u001b[39m result = \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 397\u001b[39m result = maybe_coerce_values(result)\n\u001b[32m 398\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._split_op_result(result)\n",
"\u001b[31mTypeError\u001b[39m: bad operand type for abs(): 'str'"
]
}
],
"source": [
"# DISCLAIMER: 'df' refers to the data you passed in when calling 'dtale.show'\n",
"\n",
"import pandas as pd\n",
"\n",
"if isinstance(df, (pd.DatetimeIndex, pd.MultiIndex)):\n",
"\tdf = df.to_frame(index=False)\n",
"\n",
"# remove any pre-existing indices for ease of use in the D-Tale code, but this is not required\n",
"df = df.reset_index().drop('index', axis=1, errors='ignore')\n",
"df.columns = [str(c) for c in df.columns] # update columns to strings in case they are numbers\n",
"\n",
"df['purchase_amount'] = df['purchase_amount'].str.replace(',', '.', case=False, regex='False')\n",
"df['purchase_amount'] = s = df['purchase_amount'] \n",
"\n",
"if s.str.startswith('0x').any():\n",
"\tstr_data = s.apply(float.fromhex)\n",
"else:\n",
"\tstr_data = pd.to_numeric(s, errors='coerce')\n",
"\t\n",
"pd.Series(str_data, name='purchase_amount', index=s.index)\n",
"\n",
"df['purchase_amount'] = df['purchase_amount'].abs()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "8180fa05",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2026-02-22 20:18:35,563 - INFO - D-Tale started at: http://127.0.0.1:40000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Open D-Tale at: http://127.0.0.1:40000\n"
]
}
],
"source": [
"# Launch D-Tale with the raw dataset\n",
"# A link will appear — click it to open D-Tale in a new browser ta\n",
"d = dtale.show(df, host='127.0.0.1', subprocess=False, open_browser=True)\n",
"print(\"Open D-Tale at:\", d._url) # lists all running instances\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "745a5655",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" TCP 169.254.62.24:40000 0.0.0.0:0 LISTENING 11972\n",
"\n"
]
}
],
"source": [
"# Check if something else is already on port 40000\n",
"import subprocess\n",
"result = subprocess.run('netstat -ano | findstr :40000', shell=True, capture_output=True, text=True)\n",
"print(result.stdout or \"Nothing on port 40000\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"---\n",
"\n",
"### 🔍 Issue 1 — Missing Values\n",
"\n",
"In D-Tale, go to **\"Describe\"** (top menu → Describe) to see the missing value counts per column.\n",
"\n",
"You will find:\n",
"\n",
"| Column | Missing | Note |\n",
"|---|---|---|\n",
"| `gpu_model` | ~67% | Most players are on console — GPU does not apply |\n",
"| `build_version` | ~17% | Not recorded in older sessions |\n",
"| `device_temp_c` | ~5% | Sensor not available on some devices |\n",
"| `session_length_s` | ~1% | Session ended abnormally (crash?) |\n",
"| `ping_ms`, `purchase_amount`, `end_time` | <2% | Sporadic gaps |\n",
"\n",
"**Cleaning decisions to make in D-Tale:**\n",
"\n",
"1. **`gpu_model`** — This column is missing for 67% of rows. Rather than imputing, consider: is this column useful for a console/mobile player? Go to **Column Actions → Delete Column** and remove it. Alternatively, you can keep it and decide during analysis.\n",
"\n",
"2. **`build_version`** — Missings are structurally valid (older sessions). Keep the column; do not impute.\n",
"\n",
"3. **Remaining columns** — Leave missing values in place for now. We will handle them during analysis when context is clearer.\n",
"\n",
"> 📝 **Record your decisions:** Which columns did you keep? Which did you drop? Why?\n",
"\n",
"*(Double-click to write your decisions here)*\n",
"\n",
"---\n",
"\n",
"### 🔍 Issue 2 — Boolean Columns with Mixed Encodings\n",
"\n",
"Three columns represent true/false flags but were stored with at least **8 different representations**:\n",
"\n",
"- `crash_flag` → `Yes`, `No`, `True`, `False`, `true`, `false`, `1`, `0`\n",
"- `is_featured_event` → same 8 representations \n",
"- `is_long_session` → same 8 representations\n",
"\n",
"**In D-Tale, clean each column:**\n",
"\n",
"1. Click the column header → **Column Actions → Type Conversion**\n",
"2. Select **String to Bool** (D-Tale will map Yes/True/1 → True and No/False/0 → False)\n",
"3. Preview the result before applying\n",
"4. Repeat for all three columns\n",
"\n",
"> 💡 **Alternative via Find & Replace:** If Type Conversion does not cover all variants, use **Column Actions → Replace** to manually map unusual values (e.g., `Yes` → `True`) before converting.\n",
"\n",
"After cleaning, verify with Describe: each column should show only `True` and `False`.\n",
"\n",
"---\n",
"\n",
"### 🔍 Issue 3 — Categorical Columns: Case and Whitespace Chaos\n",
"\n",
"Four categorical columns have serious inconsistency:\n",
"\n",
"- `region` — 32 variants of 5 values (e.g., `us-west`, `US-WEST`, `Us-west`, `' us-west '`)\n",
"- `map_name` — 36 variants of 6 values\n",
"- `platform` — 32 variants of 6 values\n",
"- `input_method` — 30 variants, including a typo: `controllr` instead of `controller`\n",
"\n",
"**Clean each column in D-Tale:**\n",
"\n",
"1. Click column header → **Column Actions → Type Conversion → String Cleaning**\n",
"2. Apply **Strip whitespace** and **Lowercase** (or **Uppercase** — be consistent)\n",
"3. For `input_method`, also apply a **Replace** to fix `controllr` → `controller` and `kb/m` → `kbm` (pick one variant and standardise)\n",
"\n",
"After cleaning, each column should have the expected number of unique values:\n",
"\n",
"| Column | Before | After |\n",
"|---|---|---|\n",
"| `region` | 32 | 5 |\n",
"| `map_name` | 36 | 6 |\n",
"| `platform` | 32 | 6 |\n",
"| `input_method` | 30 | 3 |\n",
"\n",
"> Use **Describe → value_counts** to verify before and after each fix.\n",
"\n",
"---\n",
"\n",
"### 🔍 Issue 4 — `purchase_amount`: Comma as Decimal Separator\n",
"\n",
"Some rows contain values like `\"0,00\"` and `\"1,80\"` where a comma was used instead of a decimal point. This prevents pandas from reading the column as numeric.\n",
"\n",
"**In D-Tale:**\n",
"\n",
"1. Filter the column to show only rows where the value contains a comma: **Column Actions → Filter → contains `,`**\n",
"2. Apply a **Replace**: replace `,` with `.` in the column\n",
"3. Then convert the column type: **Column Actions → Type Conversion → Float**\n",
"\n",
"> After conversion, verify the column dtype and check the range (min/max) with Describe.\n",
"\n",
"---\n",
"\n",
"### 🔍 Issue 5 — Outliers in Numeric Columns\n",
"\n",
"The SweetViz report and D-Tale Describe should have flagged suspicious ranges. Check these now:\n",
"\n",
"| Column | Suspicious value | Likely explanation |\n",
"|---|---|---|\n",
"| `avg_fps` | max = 10,000 | Sensor error or logging bug — physically impossible |\n",
"| `ping_ms` | max = 627 ms | High but plausible for satellite connections |\n",
"| `device_temp_c` | max = 100°C | Right at thermal throttling limit — possible but worth flagging |\n",
"\n",
"**In D-Tale, investigate `avg_fps`:**\n",
"\n",
"1. Use **Charts** (top menu) to plot a histogram of `avg_fps` — does it show an extreme outlier spike?\n",
"2. Use **Filter** to see how many rows have `avg_fps > 300` (a hard upper bound for realistic gameplay)\n",
"3. **Decide:** Should these rows be dropped, or should the value be set to `NaN` to mark it as invalid?\n",
"4. Apply your decision via **Column Actions → Replace** or a row-level **Filter + Delete**\n",
"\n",
"> 📝 **Record your decision and reasoning:** What threshold did you use? How many rows were affected?\n",
"\n",
"*(Double-click to write your answer here)*\n",
"\n",
"---\n",
"\n",
"### 🔍 Issue 6 — Mixed Datetime Formats\n",
"\n",
"The `start_time` and `end_time` columns contain timestamps in multiple formats:\n",
"\n",
"- ISO 8601 with timezone: `2025-07-18T18:32:00Z`\n",
"- ISO with offset: `2025-07-18 20:03:21-05:00` \n",
"- European: `20/10/2025 02:49`\n",
"- US: `08/01/2025 06:35`\n",
"\n",
"This is one of the harder issues to fix entirely within D-Tale's UI. For now:\n",
"\n",
"1. In D-Tale, go to **Column Actions → Type Conversion** on `start_time` and try **String to Date** with `infer_datetime_format=True`\n",
"2. Check how many values fail to parse (shown as NaT after conversion)\n",
"3. Make note of any unresolved formats — these will need to be handled in pandas with `pd.to_datetime(..., errors='coerce')` and may require a more careful cleaning pass\n",
"\n",
"> ⚠️ **Key insight:** Not all cleaning can be done point-and-click. Some issues require programmatic resolution. This is where the code D-Tale generates becomes valuable.\n",
"\n",
"---\n",
"\n",
"## Part 4 — Export the Cleaning Code from D-Tale\n",
"\n",
"Every cleaning action you performed in D-Tale was recorded as pandas code. Let's export and inspect it."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Retrieve the cleaned dataframe from D-Tale\n",
"# (This reflects all changes made in the D-Tale UI)\n",
"df_clean = d.data.copy()\n",
"\n",
"print(f'Cleaned shape: {df_clean.shape}')\n",
"print('\\nColumn types after cleaning:')\n",
"print(df_clean.dtypes)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# D-Tale also lets you export the complete cleaning pipeline as Python code.\n",
"# In the D-Tale UI: click the code icon (</>) in the top-right corner → \"Export Code\"\n",
"# Paste the exported code below:\n",
"\n",
"# --- Paste D-Tale exported code here ---\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 4.1 — Manual refinement in pandas\n",
"\n",
"D-Tale generates the skeleton; pandas lets you add precision. Here is an example of cleaning the `start_time` column more robustly — something D-Tale's UI cannot fully handle."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Example: robust datetime parsing for mixed-format timestamps\n",
"# pd.to_datetime with utc=True normalises all timezone representations\n",
"df_clean['start_time'] = pd.to_datetime(df_clean['start_time'], utc=True, errors='coerce')\n",
"df_clean['end_time'] = pd.to_datetime(df_clean['end_time'], utc=True, errors='coerce')\n",
"\n",
"# Check how many rows could not be parsed\n",
"print('Unparsed start_time rows:', df_clean['start_time'].isna().sum())\n",
"print('Unparsed end_time rows: ', df_clean['end_time'].isna().sum())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Example: cap avg_fps outliers (adjust the threshold based on your decision above)\n",
"# Replace values > 300 with NaN to mark them as invalid rather than deleting rows\n",
"fps_threshold = 300\n",
"n_outliers = (df_clean['avg_fps'] > fps_threshold).sum()\n",
"df_clean.loc[df_clean['avg_fps'] > fps_threshold, 'avg_fps'] = float('nan')\n",
"\n",
"print(f'Rows with avg_fps > {fps_threshold} set to NaN: {n_outliers}')\n",
"print(f'avg_fps range after: {df_clean[\"avg_fps\"].min():.1f} {df_clean[\"avg_fps\"].max():.1f}')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"---\n",
"\n",
"## Part 5 — Validation: Before vs After\n",
"\n",
"The real test of cleaning work is a comparison report. SweetViz can compare two dataframes side by side."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Generate a comparison report: raw vs cleaned\n",
"# This may take 6090 seconds\n",
"compare_report = sv.compare([df_raw, 'Raw'], [df_clean, 'Cleaned'])\n",
"compare_report.show_html('sweetviz_comparison_report.html', open_browser=False)\n",
"\n",
"print('Comparison report saved — open sweetviz_comparison_report.html in your browser.')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Open the comparison report and verify:\n",
"\n",
"- ✅ Boolean columns now show only `True` / `False`\n",
"- ✅ Categorical columns have the expected number of unique values\n",
"- ✅ `purchase_amount` is now numeric\n",
"- ✅ `avg_fps` no longer has a 10,000 outlier\n",
"- ✅ Missing value counts have changed as expected\n",
"\n",
"---\n",
"\n",
"## Part 6 — Save the Cleaned Dataset"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_clean.to_csv('dataset_A_indie_game_telemetry_clean.csv', index=False)\n",
"print('Cleaned dataset saved.')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"---\n",
"\n",
"## 🔑 Key Takeaways\n",
"\n",
"- **SweetViz** gives you a rapid automated overview — use it at the start and for before/after comparison. It does not clean; it informs.\n",
"- **D-Tale** lets you explore interactively, spot patterns, and clean through a UI. Every action is tracked as pandas code, so you are never locked into the GUI.\n",
"- **Pandas** remains essential for edge cases: complex datetime parsing, conditional logic, and anything requiring programmatic iteration.\n",
"- The three tools form a pipeline: **SweetViz → triage → D-Tale → interactive cleaning → pandas → refinement**.\n",
"\n",
"**Common issue categories you have now seen:**\n",
"\n",
"| Category | Example in this dataset |\n",
"|---|---|\n",
"| Boolean encoding inconsistency | 8 representations of True/False |\n",
"| Categorical case/whitespace chaos | 32 variants of 5 region names |\n",
"| Typos in categories | `controllr` vs `controller` |\n",
"| Wrong decimal separator | `1,80` instead of `1.80` |\n",
"| Structural missingness | `gpu_model` absent for console players |\n",
"| Sensor/logging outliers | `avg_fps = 10,000` |\n",
"| Mixed datetime formats | ISO 8601 mixed with European dates |\n",
"\n",
"→ In **Task 3**, you will apply these same skills independently to a new dataset — with less guidance."
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

File diff suppressed because one or more lines are too long

BIN
data/box_plots.rda Normal file

Binary file not shown.

BIN
data/box_plots_long.rda Normal file

Binary file not shown.

BIN
data/datasaurus_dozen.rda Normal file

Binary file not shown.

Binary file not shown.

BIN
data/simpsons_paradox.rda Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

File diff suppressed because one or more lines are too long

569
datasaurus_task0.csv Normal file
View File

@@ -0,0 +1,569 @@
dataset,x,y
dino,61.05620938387066,50.000786041836115
dino,57.91495193642296,59.204465996007286
dino,61.47023196059987,43.113610600617946
dino,57.80035367010236,47.24321395851151
dino,53.58712459282577,50.05299250969186
dino,54.576174284643514,55.27136753481488
dino,57.04415090058797,48.60837508246414
dino,55.7754529309817,49.668371636871335
dino,59.97631629263042,46.97420868117099
dino,55.252270806603605,43.729521303491374
dino,43.78804073666369,51.2680929772018
dino,57.457744795438025,44.28917489796779
dino,63.07901849595043,40.728171627006176
dino,54.18303406920578,47.06408074987083
dino,60.13111685743383,55.346793849501424
dino,54.61978970278766,49.89081259801087
dino,50.448857009479546,38.09601765888036
dino,52.60835140269539,48.7817448455199
dino,58.921162722910886,54.01189924392206
dino,52.45069273036819,46.48848624712332
dino,49.80578813973163,40.89991031410513
dino,47.17491923749995,57.75387697615895
dino,51.96139127299339,45.80962849194407
dino,48.98881855980029,51.88745177915955
dino,47.5444086097682,46.93629859893016
dino,50.4181337552253,49.93451248929631
dino,51.95677944972451,42.09683907938794
dino,53.88727108664538,50.14165935265209
dino,54.26606888953267,49.512359488698905
dino,51.46271162527614,46.18629417006431
dino,51.3101582088962,46.2022341922973
dino,50.74741487182219,39.36858698834162
dino,54.70970456901501,45.99109531895869
dino,47.47920661213582,50.31391127762887
dino,50.37080654246703,48.259726978980694
dino,56.91636224871015,48.64491455378705
dino,58.5576027381732,41.825870898231734
dino,55.60936656471019,44.575949545298435
dino,50.51681140327247,45.105751676177924
dino,52.75378987149051,48.28082671114873
dino,49.33940063686657,52.50413243477094
dino,55.86264975892184,40.31878156861388
dino,59.9530087751824,57.479445880152916
dino,58.715118284638606,47.10037582093825
dino,49.71698951395783,53.272258634655685
dino,52.38729221210728,54.11222535191214
dino,54.83309991230744,52.88319518241856
dino,55.425465588697605,51.532865840959744
dino,54.042000082883284,56.92935246952918
dino,54.50764837081448,50.00994681722351
dino,61.53260278822502,41.26120469428777
dino,48.91806000605707,52.846983540790056
dino,49.30750637954336,57.71810592824646
dino,52.34552407696101,44.26272594279621
dino,61.69176810592154,55.40257395717212
dino,61.47023584170628,52.53022329137693
dino,50.55509725978119,57.55032476549517
dino,52.92798651619448,52.012281978981974
dino,57.789007871094995,47.22494953454583
dino,56.45631748138432,52.61103335783263
dino,55.505702124622516,42.50299604707903
dino,55.19295269682422,54.63192948343515
dino,51.221728561074535,47.25182729836165
dino,52.25938579311345,57.24631864239671
dino,56.68917902804974,50.03730918120555
dino,50.920335702218736,50.69624595645909
dino,51.3026693573705,48.15915279137175
dino,51.456615686484476,51.3821664747325
dino,56.30636326645976,46.958506222110024
dino,55.58402685064658,42.53469245634747
dino,48.03496962917758,50.19695850632269
dino,54.66669398149011,51.17515718446053
dino,63.532579099455766,52.72239743495207
dino,50.34871109822337,53.585081440479264
dino,48.736370357953916,45.692076975926454
dino,53.72703357870147,56.56671360824683
dino,51.02098071180624,43.867807306704925
dino,53.60618990229827,44.682608568189465
dino,58.50654368842603,42.60034245818288
dino,49.41012539035559,45.81089977627783
dino,52.00787019723078,57.64766026908493
dino,57.79768322770305,48.437756206925954
dino,49.09825792467933,52.221814882007735
dino,49.99913861044174,40.27614451611194
dino,58.752119169409205,49.58471305962425
dino,57.683435295123274,49.5936382647151
dino,57.42732244761076,44.74487203349926
dino,49.86302863286214,51.40797259140813
dino,50.786361343304634,44.552251111249
dino,52.17786998593063,48.08739579512528
dino,52.58402435498606,41.125243532909906
dino,51.42552638866844,36.88298423887787
dino,56.500925804108746,39.98971172196626
dino,49.5824666422862,48.260825396304874
dino,51.041748014434745,55.71507297703368
dino,48.828572361106204,49.33525434674591
dino,53.842868727090014,42.15953251129401
dino,56.09310664212701,47.14226834388876
dino,57.08716220485467,52.117520769818654
dino,62.65294379712276,54.68263974718196
dino,52.523272648230225,46.803104112120366
dino,58.39863838354845,51.27631865361299
dino,56.56052610439037,39.91521977844583
dino,53.902695502404256,44.309845453971555
dino,55.11969839617295,47.50924805178521
dino,57.640715632370366,49.58609107595651
dino,57.1453118484359,45.667904516320284
dino,50.222214976326995,45.94975153398726
dino,53.93191834455424,49.89575867777541
dino,63.037235802763405,47.78871424169679
dino,50.17621999802889,46.27009112150307
dino,52.14561610141563,50.40740736886731
dino,47.8368119422215,48.31630997100166
dino,54.6260261518615,49.16090518100138
dino,51.61073572413855,46.810391351319964
dino,48.303756364069876,45.53340058318903
dino,51.82855409593313,50.080250231307126
dino,49.37527027271235,51.90599050854997
dino,59.97793817796548,37.650074874932336
dino,55.70503492311241,51.38454017515123
dino,51.450251897791084,46.01364092835601
dino,53.468477689652175,46.51104560299136
dino,52.76394812381151,39.61998096835012
dino,58.60932625913248,53.39809296018411
dino,50.74654296318319,40.66787836098743
dino,56.08425950581103,45.12106015093467
dino,54.56781265328312,46.40335791427452
dino,56.76615500428075,51.47374571828003
dino,51.09761048614566,41.083180223024726
dino,47.66824641065967,51.051896895536025
dino,49.24456296886388,45.46591822850656
dino,51.61474384619797,47.73716351865227
dino,46.25488077661397,48.94389298396914
dino,56.095564095336826,48.442110435223306
dino,52.75645531320611,48.48700083134392
dino,55.59618538256052,34.13703621786675
dino,61.823649233002776,49.95046661343963
dino,51.39036567045192,46.045233124061994
dino,55.97496710939675,47.41948030482817
dino,45.87726212887402,58.3224643067966
dino,53.55783737107011,53.100863558579
dino,51.231800608862436,55.68188527122899
star,55.145374755569115,51.044219172377254
star,49.81898653541218,54.0557264484135
star,56.75927265813915,54.50923114782499
star,51.48764976143369,45.594864407696065
star,63.21566679073577,42.69992088639226
star,53.45620119728672,53.684456813013476
star,54.39089987085942,50.91476839876647
star,52.4022038829485,49.850279439237596
star,48.77389259305873,56.29065339809094
star,53.52734381948572,44.59910898001575
star,56.665532328127654,45.69640106305723
star,48.662966114388986,41.266412471012224
star,56.77509261076053,47.20213280926867
star,53.46519376132624,53.388719029881315
star,49.492696764973026,44.34661123567587
star,52.4604807632749,48.47175794658537
star,53.83131419483768,46.56556403805046
star,53.75349439161741,47.46347361854413
star,51.121582445792825,43.93503505722961
star,55.09806543089576,43.54542458502236
star,49.37057896323658,46.438538744371535
star,53.36933193534474,59.283617486491046
star,51.18119889657507,52.71630362484748
star,56.988753336818526,42.05527522398132
star,57.0930119096104,42.080596799033415
star,43.36331104801304,51.031597621796905
star,46.976437666249126,50.25467230902957
star,51.26395640905113,56.29775398094936
star,58.274037597264034,45.733070980743065
star,51.24864955588527,41.9296129845294
star,52.23630947082963,46.59822252407746
star,52.54122582243326,48.78351927636182
star,56.314085990915515,49.74827228496587
star,50.943424304374226,40.811042630992105
star,59.458127392409885,44.55275407725031
star,51.39082560025992,45.394053438494446
star,46.627721799373404,45.61012997979757
star,52.08137674396821,51.10179149171756
star,56.79382859642934,48.018854445431344
star,57.72739349645722,49.69982491900631
star,53.93727155358978,48.804640841491114
star,53.237386025674404,46.02575242983275
star,52.92906585242413,42.35994334264996
star,55.12176682126518,43.034381945352095
star,57.36652505629455,46.752707099195256
star,54.19797992660036,50.46918388140478
star,56.573257860251715,40.146882956832734
star,53.172385295344114,52.40089456040391
star,47.207576722270986,49.936402376975316
star,44.97774308239124,42.88746578182198
star,54.15452220736075,39.71642448839023
star,50.0579570492634,40.64082496268207
star,60.59253972883024,48.82113877743667
star,56.269161111410675,46.88662449742423
star,52.586273004971204,39.917629056744836
star,52.83265054900855,44.19253894094189
star,57.431695697169346,53.705509333287864
star,59.86631486229671,52.26275969730616
star,51.605384252308056,42.42051507019803
star,57.066652726580344,49.78146408736144
star,46.925846197291875,49.77740896371885
star,57.25807928995147,48.29462794590815
star,53.25978531596263,43.96175756191822
star,48.21386120174645,52.00148974670014
star,52.763542220913166,46.832666692281535
star,60.93088474767653,51.42250553429595
star,55.483300005124406,48.710309025936176
star,60.07997944306309,56.59794653708097
star,57.718020445918114,50.91112295698962
star,45.62158771517542,48.61860957116753
star,53.47957218322519,48.469766146927846
star,57.77218434929007,34.30161416405222
star,51.72275178611926,49.34952177470381
star,52.13261781578895,40.9154694343687
star,57.47585394758718,49.38435952923064
star,50.11558171822206,49.57408602257912
star,57.28634284819918,48.026463231496805
star,57.202259213723984,48.391300875808305
star,52.419084069382585,42.20289741800043
star,53.65627693211355,48.971464690228856
star,57.503331046349324,47.424462657563865
star,55.82966242488396,43.17693993133136
star,50.8694833766899,47.44805350486556
star,49.78148614405995,52.1012391866234
star,55.85252131727443,49.395478821962264
star,55.355616500863775,58.10521780742398
star,52.12454324813282,36.99279357249721
star,54.79720078758586,47.74698229519167
star,51.92992382995838,43.10585070320615
star,52.24324191279141,48.90669214608911
star,51.988733197429845,60.06226839771874
star,50.157982473467406,44.03441318646164
star,44.845519839941886,49.257422075107684
star,45.93437348880096,45.302726833127494
star,52.89731786175777,44.45136017076556
star,60.955490709818044,52.97197195657749
star,59.2765475052063,43.58790590725041
star,58.51437625805828,50.48000473171981
star,57.08562379470738,53.147194143913836
star,50.36494701616379,45.878411895110496
star,57.450384045313804,34.72190453512533
star,60.05331233029282,50.76566032103792
star,53.81718415735906,49.102538278785865
star,49.880258866764095,46.25028317705448
star,58.40113735288149,54.490109861631105
star,64.78489621025432,47.630376668597926
star,51.36578813277998,45.42883017030005
star,49.92783249885054,47.610726220295746
star,55.53092972004907,47.82878859734023
star,58.3853873826632,46.828920993277315
star,52.610197390005744,45.093657615698376
star,47.46946189506202,40.16116137845773
star,49.28336827744925,54.5071403583238
star,57.58104109155972,54.87482033196495
star,48.67115338162199,38.1568765510699
star,51.359774719463665,48.87909476648014
star,55.99476109963931,53.239860779840264
star,55.137118683228856,56.71334390327816
star,53.10957727620672,43.43460390979102
star,47.27512713802226,43.555143209522754
star,54.96847184394049,43.55639871323184
star,57.74696985414103,55.06163853018722
star,44.52165237909359,52.320261502488236
star,45.04158376535305,50.00749527545144
star,58.89948225677464,48.32428053171788
star,48.88124330718304,45.07284397861114
star,52.9534182171564,47.088776081050284
star,53.18841263693333,47.45058610345343
star,54.853920195640676,41.957131731333895
star,53.03192068051912,55.59130585177853
star,52.46141830742993,45.78081953422401
star,58.31278921485695,35.20407666827952
star,58.72551440515314,44.840481209974165
star,54.65571428981035,48.48160677960598
star,57.769872476881574,46.66202626882326
star,51.2878968737422,54.48922895325549
star,44.54330473143528,48.101670908526216
star,48.608298309483516,44.192133058717204
star,62.04502672585255,47.77702286772072
star,54.78027878860552,39.09218572147204
star,51.08382136482122,48.98278700364393
star,55.41903077245287,51.08443277196639
circle,54.0345115956703,50.6350210422733
circle,55.815127650542735,38.85129794497735
circle,54.1480228876406,51.83951203866352
circle,56.35951928293808,46.18070595014605
circle,50.77749396984253,42.40844037839184
circle,53.47578395383435,53.66539939779861
circle,46.19278359407359,44.70054135135251
circle,49.4407901782929,51.9247876062025
circle,51.782761493714794,45.64681170922604
circle,53.132201720253406,50.226966254473986
circle,52.430444007401455,32.769284726000365
circle,56.17324756555008,50.19521478836021
circle,53.12183588667515,42.579816896640324
circle,55.407120442725436,49.89617766767793
circle,52.119868469196504,46.916342647122306
circle,50.279373989902716,47.10705453956336
circle,47.798282619666075,50.08659410515918
circle,50.222526036702824,49.19051573916156
circle,48.3761483349284,45.0497117706523
circle,53.558042379736285,39.696500940653685
circle,54.46059149256037,46.10426218560038
circle,47.030575208763075,41.48378622943842
circle,56.420480336328666,52.47777992775662
circle,53.47236544088013,50.02380906020249
circle,54.895374253251646,49.64811491063869
circle,59.14393602832117,40.46500800892864
circle,56.70584292944649,46.0899552221109
circle,53.10296426299344,46.48875134772465
circle,52.499411533354866,41.869019041084904
circle,54.7333567970304,56.354715163944284
circle,53.775467918204974,47.99307478632452
circle,51.25080385137335,47.41262726790944
circle,55.86466570413612,46.14878779647829
circle,52.184783835792,50.01632270081623
circle,50.32798092072382,49.262483135384365
circle,57.281287189045685,54.799742708397424
circle,53.63847197089221,54.837986199033566
circle,58.13763954592607,43.01893679814467
circle,49.1282459536274,46.4751818107279
circle,58.11574197037942,47.63856496219998
circle,51.59736976936885,55.7612159002428
circle,55.147617952013384,36.39702862104629
circle,55.26864250517075,50.60020307285434
circle,54.90243461788442,50.248560501159965
circle,53.73089756430807,41.408020651776326
circle,52.51718398711819,43.27192102221854
circle,50.26903635682249,41.684658254488625
circle,55.80995637055859,48.48948072706314
circle,52.20733854927717,44.753310361348056
circle,53.90630757991419,53.395973640562445
circle,45.98313713800434,49.88438260425446
circle,51.81715210392887,38.57707077510276
circle,46.21718766734416,43.43608252932354
circle,54.87803822317218,49.96531466990046
circle,50.244073709288855,53.08510495706622
circle,59.69193398606443,49.980432924782804
circle,51.634389328767554,53.62209592255184
circle,57.021582782653354,52.33703705677459
circle,51.374145300113874,33.827227473626486
circle,62.4671640859347,39.94560798275033
circle,53.85692771255911,59.90372675609875
circle,55.322307025097494,52.74623236779118
circle,47.99041372247315,39.11166522633147
circle,51.86918883208178,53.4537486721725
circle,52.61500220941076,44.026818394642504
circle,54.79186915977987,53.40967609238633
circle,48.22023920370651,41.94728502938324
circle,50.84532298196253,53.47319187356046
circle,54.93928610379493,58.66076705285222
circle,57.74578290332446,47.82452411565165
circle,59.060311352355065,49.05748506365939
circle,51.18031458997022,51.39987422122551
circle,51.214693384555666,46.54801449598067
circle,59.311130783831935,47.493592568913
circle,50.787434450633484,45.67831154282254
circle,58.087162342354695,45.237296632916355
circle,52.452516612597414,45.44853630183186
circle,54.73570197736124,46.07255119812196
circle,47.59265580410985,43.5640952907748
circle,50.268843833974245,54.21659692227578
circle,57.25069616843617,50.93629689699913
circle,51.97856673094236,43.921042290030144
circle,51.96992959337057,42.74059948724163
circle,63.98880156634803,36.77339175814299
circle,56.256034140295235,41.57723851003736
circle,53.58262603402123,43.05999028753133
circle,49.28948415006948,42.29901849532519
circle,61.01994461496823,47.33505788845204
circle,50.937191222087655,50.778934820414364
circle,54.0413972582652,51.60016879670826
circle,46.70297337624868,49.518019522310006
circle,57.09077934840953,39.69200854442718
circle,55.79278113769325,56.480907864140804
circle,53.940569186581186,52.107029685124054
circle,56.68228180124364,44.46247151244712
circle,54.15906693834598,40.165026445691986
circle,52.19478785158989,49.32843987483118
circle,56.89240197495119,48.12306062623956
circle,56.87993492057266,42.48546893522315
circle,53.593210901780516,48.09639692256538
circle,61.39836498671855,46.92916671899958
circle,52.00393344802327,48.10675611921774
circle,50.32354622052023,48.963769245326084
circle,52.53977913381497,39.04336225979408
circle,53.76565379546456,46.412284530349005
circle,47.47030679172806,47.66432922692739
circle,59.95742384829792,50.606518741378565
circle,56.447708770924635,41.29251637208479
circle,55.90759347568889,48.74224790690039
circle,56.116180953337725,50.11314310854413
circle,48.56087709798474,47.79299594221016
circle,50.968516558299335,47.74957952857589
circle,50.410396292392676,54.56235183570498
circle,50.56411044622263,43.50528921767232
circle,54.29834562617421,42.614504652980024
circle,52.30134679026854,43.85017701231019
circle,59.64468825555845,51.929019134155865
circle,53.77012192613842,46.04391473912992
circle,57.76367045830045,50.02602040161444
circle,55.99220961873143,47.86903881327875
circle,47.24707988891427,47.437670087202235
circle,51.87004032316373,51.22527636729939
circle,58.04736973197676,44.71024477619416
circle,55.87354093710821,56.67939498842857
circle,51.32914911771776,56.40960870036569
circle,50.58965661131741,48.114798778039756
circle,53.95541755263263,48.05749449935031
circle,50.649287832368216,45.04408448117785
circle,51.3291188545624,49.634812977022285
circle,55.320140458041244,59.129721658691295
circle,59.48395602516449,45.45078378930761
circle,55.29947846318474,52.9855899039588
circle,54.12240729735407,47.651792107765324
circle,54.20629977107956,52.33638314404214
circle,50.60671790877907,46.371652655899126
circle,55.881732579385925,49.55723535777078
circle,54.958331039425566,46.150994168480985
circle,57.89014315657015,58.66934123602269
circle,55.625661974704826,47.0341164922508
circle,57.02296115557817,45.30433681623505
circle,51.00123862118841,48.16404373806856
circle,43.66881346812022,42.23024818173995
bullseye,52.60815257631662,41.233055709261436
bullseye,49.86942759243148,45.81625831270996
bullseye,47.42813882587756,45.96964101870084
bullseye,51.85891934186862,48.12702604192508
bullseye,58.616736121976075,48.86252208246433
bullseye,54.08424808536825,48.49727228515356
bullseye,54.90957110048451,42.91630675695116
bullseye,53.540898700916806,49.543756209183066
bullseye,48.51696006982776,52.328264614079266
bullseye,58.32550413783276,44.843120057753055
bullseye,53.03464883418758,43.60904828594963
bullseye,56.797521934351266,42.693888562770454
bullseye,53.11009195902832,43.70540046096164
bullseye,54.20381710804516,39.0288536425514
bullseye,59.30584656946277,43.17696787896868
bullseye,54.239578732465105,46.937384776148456
bullseye,50.95154195231001,43.561099316820325
bullseye,57.74559417420984,45.3717970344903
bullseye,55.084680738549245,43.99251557302813
bullseye,51.411274272608956,50.36123575043924
bullseye,57.721633984444466,47.1234179883649
bullseye,48.31232051343825,57.98978039875002
bullseye,50.573802767063164,40.29206300164106
bullseye,64.37769835107261,45.97983853074531
bullseye,48.15306924695437,44.58280116605659
bullseye,55.470179584089074,48.9515577879697
bullseye,50.59308321098564,57.113618000639796
bullseye,51.91368128802651,42.07656704794224
bullseye,57.84277359384264,54.64531423269841
bullseye,50.73002760953509,40.99326353480345
bullseye,58.12175306966242,37.76338193471019
bullseye,49.09351336241354,52.83723075025117
bullseye,53.77858980791044,46.68031325703656
bullseye,55.411266425977516,47.23612788227295
bullseye,48.80525311134764,54.38037673003094
bullseye,59.30005621154726,49.0266628188898
bullseye,54.18053606172804,59.69812403010029
bullseye,52.89426861993665,46.7021150908298
bullseye,55.45792499696202,55.35660978071169
bullseye,60.37108301766993,46.70713684161614
bullseye,55.23332498383738,41.109582664679614
bullseye,52.75209556833515,43.79854802260347
bullseye,49.972672990804064,56.407883581336634
bullseye,50.83085335277542,45.34197045994273
bullseye,55.463395151667434,54.489126334867926
bullseye,55.924460505555345,61.79677557010791
bullseye,53.7013280869954,49.29358220114873
bullseye,55.10240269593623,55.17524693394577
bullseye,56.02895580443846,47.41885149806426
bullseye,50.21004562037248,49.22221727980816
bullseye,59.60537932516784,45.948091031710646
bullseye,56.11577447366633,49.23073894342422
bullseye,57.454078633525256,43.97623129681065
bullseye,63.38658812210625,41.60419446485871
bullseye,52.53779564005585,52.69046270452823
bullseye,55.18693268997645,52.14993079540552
bullseye,52.01559066406975,47.625975086598295
bullseye,54.048927934555266,55.846298072679524
bullseye,56.761716097532194,51.98336054182336
bullseye,51.36829562985282,52.84441319281525
bullseye,54.902326654275214,54.94572657838964
bullseye,62.05624061967387,46.466171119864974
bullseye,52.37478747821975,43.67977504448815
bullseye,53.42568195313472,46.08987275524808
bullseye,55.43801759828404,47.277165915331324
bullseye,52.55360287687352,53.32292568063926
bullseye,50.248479075394194,50.16553976575672
bullseye,52.376233091246064,51.62184252434982
bullseye,59.54104618688892,46.48450873287964
bullseye,55.7641316290926,48.8939643286659
bullseye,50.80231040182761,49.20393754870969
bullseye,55.15648202111525,50.06435410223087
bullseye,53.20640441271982,48.47096150155073
bullseye,49.40955622062746,46.209429622600716
bullseye,56.223850718839195,52.46236943665765
bullseye,52.310740703498915,48.523570147166424
bullseye,54.91221330049627,49.00739973352216
bullseye,56.163094341201564,38.90961184808215
bullseye,53.802703719409706,49.19516800623382
bullseye,49.99867860418518,56.369928535053546
bullseye,54.64623706895267,55.81702372514465
bullseye,50.83790791266769,43.463499392373365
bullseye,54.897008883862725,39.60655818585671
bullseye,54.85986236250437,48.48609616001459
bullseye,58.06266112616841,51.505206705825486
bullseye,52.33009060045986,42.512516726148775
bullseye,60.849220885373185,44.03942489717425
bullseye,49.817901771722134,42.57571970269277
bullseye,58.46922126214666,45.40549897787574
bullseye,50.9851821352776,48.688449129516734
bullseye,53.172221157770835,44.609522696068765
bullseye,57.01596586791392,53.32657746048997
bullseye,57.941270035759466,51.834598348305995
bullseye,55.61050212452014,39.120560000190856
bullseye,60.677003225507875,49.509946051787765
bullseye,56.43262571040257,53.57481161473701
bullseye,59.733410011527965,50.09199005654596
bullseye,55.74218463718262,45.0038786127014
bullseye,54.13235900455504,43.72919369592823
bullseye,51.12023787143242,43.532127988429046
bullseye,53.37590443601085,53.24546593960005
bullseye,66.68389909316072,48.94749818773957
bullseye,48.60634764897552,54.32491664928128
bullseye,52.79686449409589,44.69695703011504
bullseye,54.839397911692224,41.796877002218864
bullseye,54.88985265603443,47.55812238400423
bullseye,54.3935116272619,49.907081271045854
bullseye,54.269969028963224,48.08169042056446
bullseye,55.13725807599178,50.07700313085564
bullseye,49.87407015873897,40.850043706572755
bullseye,53.75344779131114,40.836322550329456
bullseye,54.35012588368669,52.69373437841155
bullseye,56.428446687664184,42.759147965872636
bullseye,50.558950192169924,49.64150647500378
bullseye,52.39480877946596,46.41672352473947
bullseye,56.387625924991816,43.06356653271174
bullseye,52.39506116035527,43.99958761957699
bullseye,49.82748200785858,43.71460905664418
bullseye,56.709848677385644,48.259101947412105
bullseye,50.4833574846597,46.844491962035015
bullseye,47.44477077151129,44.33343596208167
bullseye,62.59829813946915,47.548780751671245
bullseye,56.92663570812157,47.672558124277586
bullseye,55.392676940967235,51.31629044839587
bullseye,49.581533609894116,47.845318713630206
bullseye,60.315460777665955,44.02249724973355
bullseye,51.73424058507112,46.46154361316499
bullseye,55.07609629270499,50.62458931822941
bullseye,59.06964661927463,50.4974911673433
bullseye,53.751787496806664,54.295835648054066
bullseye,56.81644408856633,40.52160241871492
bullseye,64.105472961424,56.84960694098367
bullseye,53.3271431089317,49.88955050869237
bullseye,59.29743499998336,47.138996036515586
bullseye,56.92140716150808,53.52289236785738
bullseye,49.94069636906224,44.98834073208569
bullseye,57.68563359124232,50.30407238574415
bullseye,57.69518624125579,47.33715992673141
bullseye,52.84397915618307,38.006802622083136
bullseye,49.415998293970226,48.23533047329247
bullseye,57.29822887825694,50.65589183267848
bullseye,53.48703210388919,46.64114216754652
1 dataset x y
2 dino 61.05620938387066 50.000786041836115
3 dino 57.91495193642296 59.204465996007286
4 dino 61.47023196059987 43.113610600617946
5 dino 57.80035367010236 47.24321395851151
6 dino 53.58712459282577 50.05299250969186
7 dino 54.576174284643514 55.27136753481488
8 dino 57.04415090058797 48.60837508246414
9 dino 55.7754529309817 49.668371636871335
10 dino 59.97631629263042 46.97420868117099
11 dino 55.252270806603605 43.729521303491374
12 dino 43.78804073666369 51.2680929772018
13 dino 57.457744795438025 44.28917489796779
14 dino 63.07901849595043 40.728171627006176
15 dino 54.18303406920578 47.06408074987083
16 dino 60.13111685743383 55.346793849501424
17 dino 54.61978970278766 49.89081259801087
18 dino 50.448857009479546 38.09601765888036
19 dino 52.60835140269539 48.7817448455199
20 dino 58.921162722910886 54.01189924392206
21 dino 52.45069273036819 46.48848624712332
22 dino 49.80578813973163 40.89991031410513
23 dino 47.17491923749995 57.75387697615895
24 dino 51.96139127299339 45.80962849194407
25 dino 48.98881855980029 51.88745177915955
26 dino 47.5444086097682 46.93629859893016
27 dino 50.4181337552253 49.93451248929631
28 dino 51.95677944972451 42.09683907938794
29 dino 53.88727108664538 50.14165935265209
30 dino 54.26606888953267 49.512359488698905
31 dino 51.46271162527614 46.18629417006431
32 dino 51.3101582088962 46.2022341922973
33 dino 50.74741487182219 39.36858698834162
34 dino 54.70970456901501 45.99109531895869
35 dino 47.47920661213582 50.31391127762887
36 dino 50.37080654246703 48.259726978980694
37 dino 56.91636224871015 48.64491455378705
38 dino 58.5576027381732 41.825870898231734
39 dino 55.60936656471019 44.575949545298435
40 dino 50.51681140327247 45.105751676177924
41 dino 52.75378987149051 48.28082671114873
42 dino 49.33940063686657 52.50413243477094
43 dino 55.86264975892184 40.31878156861388
44 dino 59.9530087751824 57.479445880152916
45 dino 58.715118284638606 47.10037582093825
46 dino 49.71698951395783 53.272258634655685
47 dino 52.38729221210728 54.11222535191214
48 dino 54.83309991230744 52.88319518241856
49 dino 55.425465588697605 51.532865840959744
50 dino 54.042000082883284 56.92935246952918
51 dino 54.50764837081448 50.00994681722351
52 dino 61.53260278822502 41.26120469428777
53 dino 48.91806000605707 52.846983540790056
54 dino 49.30750637954336 57.71810592824646
55 dino 52.34552407696101 44.26272594279621
56 dino 61.69176810592154 55.40257395717212
57 dino 61.47023584170628 52.53022329137693
58 dino 50.55509725978119 57.55032476549517
59 dino 52.92798651619448 52.012281978981974
60 dino 57.789007871094995 47.22494953454583
61 dino 56.45631748138432 52.61103335783263
62 dino 55.505702124622516 42.50299604707903
63 dino 55.19295269682422 54.63192948343515
64 dino 51.221728561074535 47.25182729836165
65 dino 52.25938579311345 57.24631864239671
66 dino 56.68917902804974 50.03730918120555
67 dino 50.920335702218736 50.69624595645909
68 dino 51.3026693573705 48.15915279137175
69 dino 51.456615686484476 51.3821664747325
70 dino 56.30636326645976 46.958506222110024
71 dino 55.58402685064658 42.53469245634747
72 dino 48.03496962917758 50.19695850632269
73 dino 54.66669398149011 51.17515718446053
74 dino 63.532579099455766 52.72239743495207
75 dino 50.34871109822337 53.585081440479264
76 dino 48.736370357953916 45.692076975926454
77 dino 53.72703357870147 56.56671360824683
78 dino 51.02098071180624 43.867807306704925
79 dino 53.60618990229827 44.682608568189465
80 dino 58.50654368842603 42.60034245818288
81 dino 49.41012539035559 45.81089977627783
82 dino 52.00787019723078 57.64766026908493
83 dino 57.79768322770305 48.437756206925954
84 dino 49.09825792467933 52.221814882007735
85 dino 49.99913861044174 40.27614451611194
86 dino 58.752119169409205 49.58471305962425
87 dino 57.683435295123274 49.5936382647151
88 dino 57.42732244761076 44.74487203349926
89 dino 49.86302863286214 51.40797259140813
90 dino 50.786361343304634 44.552251111249
91 dino 52.17786998593063 48.08739579512528
92 dino 52.58402435498606 41.125243532909906
93 dino 51.42552638866844 36.88298423887787
94 dino 56.500925804108746 39.98971172196626
95 dino 49.5824666422862 48.260825396304874
96 dino 51.041748014434745 55.71507297703368
97 dino 48.828572361106204 49.33525434674591
98 dino 53.842868727090014 42.15953251129401
99 dino 56.09310664212701 47.14226834388876
100 dino 57.08716220485467 52.117520769818654
101 dino 62.65294379712276 54.68263974718196
102 dino 52.523272648230225 46.803104112120366
103 dino 58.39863838354845 51.27631865361299
104 dino 56.56052610439037 39.91521977844583
105 dino 53.902695502404256 44.309845453971555
106 dino 55.11969839617295 47.50924805178521
107 dino 57.640715632370366 49.58609107595651
108 dino 57.1453118484359 45.667904516320284
109 dino 50.222214976326995 45.94975153398726
110 dino 53.93191834455424 49.89575867777541
111 dino 63.037235802763405 47.78871424169679
112 dino 50.17621999802889 46.27009112150307
113 dino 52.14561610141563 50.40740736886731
114 dino 47.8368119422215 48.31630997100166
115 dino 54.6260261518615 49.16090518100138
116 dino 51.61073572413855 46.810391351319964
117 dino 48.303756364069876 45.53340058318903
118 dino 51.82855409593313 50.080250231307126
119 dino 49.37527027271235 51.90599050854997
120 dino 59.97793817796548 37.650074874932336
121 dino 55.70503492311241 51.38454017515123
122 dino 51.450251897791084 46.01364092835601
123 dino 53.468477689652175 46.51104560299136
124 dino 52.76394812381151 39.61998096835012
125 dino 58.60932625913248 53.39809296018411
126 dino 50.74654296318319 40.66787836098743
127 dino 56.08425950581103 45.12106015093467
128 dino 54.56781265328312 46.40335791427452
129 dino 56.76615500428075 51.47374571828003
130 dino 51.09761048614566 41.083180223024726
131 dino 47.66824641065967 51.051896895536025
132 dino 49.24456296886388 45.46591822850656
133 dino 51.61474384619797 47.73716351865227
134 dino 46.25488077661397 48.94389298396914
135 dino 56.095564095336826 48.442110435223306
136 dino 52.75645531320611 48.48700083134392
137 dino 55.59618538256052 34.13703621786675
138 dino 61.823649233002776 49.95046661343963
139 dino 51.39036567045192 46.045233124061994
140 dino 55.97496710939675 47.41948030482817
141 dino 45.87726212887402 58.3224643067966
142 dino 53.55783737107011 53.100863558579
143 dino 51.231800608862436 55.68188527122899
144 star 55.145374755569115 51.044219172377254
145 star 49.81898653541218 54.0557264484135
146 star 56.75927265813915 54.50923114782499
147 star 51.48764976143369 45.594864407696065
148 star 63.21566679073577 42.69992088639226
149 star 53.45620119728672 53.684456813013476
150 star 54.39089987085942 50.91476839876647
151 star 52.4022038829485 49.850279439237596
152 star 48.77389259305873 56.29065339809094
153 star 53.52734381948572 44.59910898001575
154 star 56.665532328127654 45.69640106305723
155 star 48.662966114388986 41.266412471012224
156 star 56.77509261076053 47.20213280926867
157 star 53.46519376132624 53.388719029881315
158 star 49.492696764973026 44.34661123567587
159 star 52.4604807632749 48.47175794658537
160 star 53.83131419483768 46.56556403805046
161 star 53.75349439161741 47.46347361854413
162 star 51.121582445792825 43.93503505722961
163 star 55.09806543089576 43.54542458502236
164 star 49.37057896323658 46.438538744371535
165 star 53.36933193534474 59.283617486491046
166 star 51.18119889657507 52.71630362484748
167 star 56.988753336818526 42.05527522398132
168 star 57.0930119096104 42.080596799033415
169 star 43.36331104801304 51.031597621796905
170 star 46.976437666249126 50.25467230902957
171 star 51.26395640905113 56.29775398094936
172 star 58.274037597264034 45.733070980743065
173 star 51.24864955588527 41.9296129845294
174 star 52.23630947082963 46.59822252407746
175 star 52.54122582243326 48.78351927636182
176 star 56.314085990915515 49.74827228496587
177 star 50.943424304374226 40.811042630992105
178 star 59.458127392409885 44.55275407725031
179 star 51.39082560025992 45.394053438494446
180 star 46.627721799373404 45.61012997979757
181 star 52.08137674396821 51.10179149171756
182 star 56.79382859642934 48.018854445431344
183 star 57.72739349645722 49.69982491900631
184 star 53.93727155358978 48.804640841491114
185 star 53.237386025674404 46.02575242983275
186 star 52.92906585242413 42.35994334264996
187 star 55.12176682126518 43.034381945352095
188 star 57.36652505629455 46.752707099195256
189 star 54.19797992660036 50.46918388140478
190 star 56.573257860251715 40.146882956832734
191 star 53.172385295344114 52.40089456040391
192 star 47.207576722270986 49.936402376975316
193 star 44.97774308239124 42.88746578182198
194 star 54.15452220736075 39.71642448839023
195 star 50.0579570492634 40.64082496268207
196 star 60.59253972883024 48.82113877743667
197 star 56.269161111410675 46.88662449742423
198 star 52.586273004971204 39.917629056744836
199 star 52.83265054900855 44.19253894094189
200 star 57.431695697169346 53.705509333287864
201 star 59.86631486229671 52.26275969730616
202 star 51.605384252308056 42.42051507019803
203 star 57.066652726580344 49.78146408736144
204 star 46.925846197291875 49.77740896371885
205 star 57.25807928995147 48.29462794590815
206 star 53.25978531596263 43.96175756191822
207 star 48.21386120174645 52.00148974670014
208 star 52.763542220913166 46.832666692281535
209 star 60.93088474767653 51.42250553429595
210 star 55.483300005124406 48.710309025936176
211 star 60.07997944306309 56.59794653708097
212 star 57.718020445918114 50.91112295698962
213 star 45.62158771517542 48.61860957116753
214 star 53.47957218322519 48.469766146927846
215 star 57.77218434929007 34.30161416405222
216 star 51.72275178611926 49.34952177470381
217 star 52.13261781578895 40.9154694343687
218 star 57.47585394758718 49.38435952923064
219 star 50.11558171822206 49.57408602257912
220 star 57.28634284819918 48.026463231496805
221 star 57.202259213723984 48.391300875808305
222 star 52.419084069382585 42.20289741800043
223 star 53.65627693211355 48.971464690228856
224 star 57.503331046349324 47.424462657563865
225 star 55.82966242488396 43.17693993133136
226 star 50.8694833766899 47.44805350486556
227 star 49.78148614405995 52.1012391866234
228 star 55.85252131727443 49.395478821962264
229 star 55.355616500863775 58.10521780742398
230 star 52.12454324813282 36.99279357249721
231 star 54.79720078758586 47.74698229519167
232 star 51.92992382995838 43.10585070320615
233 star 52.24324191279141 48.90669214608911
234 star 51.988733197429845 60.06226839771874
235 star 50.157982473467406 44.03441318646164
236 star 44.845519839941886 49.257422075107684
237 star 45.93437348880096 45.302726833127494
238 star 52.89731786175777 44.45136017076556
239 star 60.955490709818044 52.97197195657749
240 star 59.2765475052063 43.58790590725041
241 star 58.51437625805828 50.48000473171981
242 star 57.08562379470738 53.147194143913836
243 star 50.36494701616379 45.878411895110496
244 star 57.450384045313804 34.72190453512533
245 star 60.05331233029282 50.76566032103792
246 star 53.81718415735906 49.102538278785865
247 star 49.880258866764095 46.25028317705448
248 star 58.40113735288149 54.490109861631105
249 star 64.78489621025432 47.630376668597926
250 star 51.36578813277998 45.42883017030005
251 star 49.92783249885054 47.610726220295746
252 star 55.53092972004907 47.82878859734023
253 star 58.3853873826632 46.828920993277315
254 star 52.610197390005744 45.093657615698376
255 star 47.46946189506202 40.16116137845773
256 star 49.28336827744925 54.5071403583238
257 star 57.58104109155972 54.87482033196495
258 star 48.67115338162199 38.1568765510699
259 star 51.359774719463665 48.87909476648014
260 star 55.99476109963931 53.239860779840264
261 star 55.137118683228856 56.71334390327816
262 star 53.10957727620672 43.43460390979102
263 star 47.27512713802226 43.555143209522754
264 star 54.96847184394049 43.55639871323184
265 star 57.74696985414103 55.06163853018722
266 star 44.52165237909359 52.320261502488236
267 star 45.04158376535305 50.00749527545144
268 star 58.89948225677464 48.32428053171788
269 star 48.88124330718304 45.07284397861114
270 star 52.9534182171564 47.088776081050284
271 star 53.18841263693333 47.45058610345343
272 star 54.853920195640676 41.957131731333895
273 star 53.03192068051912 55.59130585177853
274 star 52.46141830742993 45.78081953422401
275 star 58.31278921485695 35.20407666827952
276 star 58.72551440515314 44.840481209974165
277 star 54.65571428981035 48.48160677960598
278 star 57.769872476881574 46.66202626882326
279 star 51.2878968737422 54.48922895325549
280 star 44.54330473143528 48.101670908526216
281 star 48.608298309483516 44.192133058717204
282 star 62.04502672585255 47.77702286772072
283 star 54.78027878860552 39.09218572147204
284 star 51.08382136482122 48.98278700364393
285 star 55.41903077245287 51.08443277196639
286 circle 54.0345115956703 50.6350210422733
287 circle 55.815127650542735 38.85129794497735
288 circle 54.1480228876406 51.83951203866352
289 circle 56.35951928293808 46.18070595014605
290 circle 50.77749396984253 42.40844037839184
291 circle 53.47578395383435 53.66539939779861
292 circle 46.19278359407359 44.70054135135251
293 circle 49.4407901782929 51.9247876062025
294 circle 51.782761493714794 45.64681170922604
295 circle 53.132201720253406 50.226966254473986
296 circle 52.430444007401455 32.769284726000365
297 circle 56.17324756555008 50.19521478836021
298 circle 53.12183588667515 42.579816896640324
299 circle 55.407120442725436 49.89617766767793
300 circle 52.119868469196504 46.916342647122306
301 circle 50.279373989902716 47.10705453956336
302 circle 47.798282619666075 50.08659410515918
303 circle 50.222526036702824 49.19051573916156
304 circle 48.3761483349284 45.0497117706523
305 circle 53.558042379736285 39.696500940653685
306 circle 54.46059149256037 46.10426218560038
307 circle 47.030575208763075 41.48378622943842
308 circle 56.420480336328666 52.47777992775662
309 circle 53.47236544088013 50.02380906020249
310 circle 54.895374253251646 49.64811491063869
311 circle 59.14393602832117 40.46500800892864
312 circle 56.70584292944649 46.0899552221109
313 circle 53.10296426299344 46.48875134772465
314 circle 52.499411533354866 41.869019041084904
315 circle 54.7333567970304 56.354715163944284
316 circle 53.775467918204974 47.99307478632452
317 circle 51.25080385137335 47.41262726790944
318 circle 55.86466570413612 46.14878779647829
319 circle 52.184783835792 50.01632270081623
320 circle 50.32798092072382 49.262483135384365
321 circle 57.281287189045685 54.799742708397424
322 circle 53.63847197089221 54.837986199033566
323 circle 58.13763954592607 43.01893679814467
324 circle 49.1282459536274 46.4751818107279
325 circle 58.11574197037942 47.63856496219998
326 circle 51.59736976936885 55.7612159002428
327 circle 55.147617952013384 36.39702862104629
328 circle 55.26864250517075 50.60020307285434
329 circle 54.90243461788442 50.248560501159965
330 circle 53.73089756430807 41.408020651776326
331 circle 52.51718398711819 43.27192102221854
332 circle 50.26903635682249 41.684658254488625
333 circle 55.80995637055859 48.48948072706314
334 circle 52.20733854927717 44.753310361348056
335 circle 53.90630757991419 53.395973640562445
336 circle 45.98313713800434 49.88438260425446
337 circle 51.81715210392887 38.57707077510276
338 circle 46.21718766734416 43.43608252932354
339 circle 54.87803822317218 49.96531466990046
340 circle 50.244073709288855 53.08510495706622
341 circle 59.69193398606443 49.980432924782804
342 circle 51.634389328767554 53.62209592255184
343 circle 57.021582782653354 52.33703705677459
344 circle 51.374145300113874 33.827227473626486
345 circle 62.4671640859347 39.94560798275033
346 circle 53.85692771255911 59.90372675609875
347 circle 55.322307025097494 52.74623236779118
348 circle 47.99041372247315 39.11166522633147
349 circle 51.86918883208178 53.4537486721725
350 circle 52.61500220941076 44.026818394642504
351 circle 54.79186915977987 53.40967609238633
352 circle 48.22023920370651 41.94728502938324
353 circle 50.84532298196253 53.47319187356046
354 circle 54.93928610379493 58.66076705285222
355 circle 57.74578290332446 47.82452411565165
356 circle 59.060311352355065 49.05748506365939
357 circle 51.18031458997022 51.39987422122551
358 circle 51.214693384555666 46.54801449598067
359 circle 59.311130783831935 47.493592568913
360 circle 50.787434450633484 45.67831154282254
361 circle 58.087162342354695 45.237296632916355
362 circle 52.452516612597414 45.44853630183186
363 circle 54.73570197736124 46.07255119812196
364 circle 47.59265580410985 43.5640952907748
365 circle 50.268843833974245 54.21659692227578
366 circle 57.25069616843617 50.93629689699913
367 circle 51.97856673094236 43.921042290030144
368 circle 51.96992959337057 42.74059948724163
369 circle 63.98880156634803 36.77339175814299
370 circle 56.256034140295235 41.57723851003736
371 circle 53.58262603402123 43.05999028753133
372 circle 49.28948415006948 42.29901849532519
373 circle 61.01994461496823 47.33505788845204
374 circle 50.937191222087655 50.778934820414364
375 circle 54.0413972582652 51.60016879670826
376 circle 46.70297337624868 49.518019522310006
377 circle 57.09077934840953 39.69200854442718
378 circle 55.79278113769325 56.480907864140804
379 circle 53.940569186581186 52.107029685124054
380 circle 56.68228180124364 44.46247151244712
381 circle 54.15906693834598 40.165026445691986
382 circle 52.19478785158989 49.32843987483118
383 circle 56.89240197495119 48.12306062623956
384 circle 56.87993492057266 42.48546893522315
385 circle 53.593210901780516 48.09639692256538
386 circle 61.39836498671855 46.92916671899958
387 circle 52.00393344802327 48.10675611921774
388 circle 50.32354622052023 48.963769245326084
389 circle 52.53977913381497 39.04336225979408
390 circle 53.76565379546456 46.412284530349005
391 circle 47.47030679172806 47.66432922692739
392 circle 59.95742384829792 50.606518741378565
393 circle 56.447708770924635 41.29251637208479
394 circle 55.90759347568889 48.74224790690039
395 circle 56.116180953337725 50.11314310854413
396 circle 48.56087709798474 47.79299594221016
397 circle 50.968516558299335 47.74957952857589
398 circle 50.410396292392676 54.56235183570498
399 circle 50.56411044622263 43.50528921767232
400 circle 54.29834562617421 42.614504652980024
401 circle 52.30134679026854 43.85017701231019
402 circle 59.64468825555845 51.929019134155865
403 circle 53.77012192613842 46.04391473912992
404 circle 57.76367045830045 50.02602040161444
405 circle 55.99220961873143 47.86903881327875
406 circle 47.24707988891427 47.437670087202235
407 circle 51.87004032316373 51.22527636729939
408 circle 58.04736973197676 44.71024477619416
409 circle 55.87354093710821 56.67939498842857
410 circle 51.32914911771776 56.40960870036569
411 circle 50.58965661131741 48.114798778039756
412 circle 53.95541755263263 48.05749449935031
413 circle 50.649287832368216 45.04408448117785
414 circle 51.3291188545624 49.634812977022285
415 circle 55.320140458041244 59.129721658691295
416 circle 59.48395602516449 45.45078378930761
417 circle 55.29947846318474 52.9855899039588
418 circle 54.12240729735407 47.651792107765324
419 circle 54.20629977107956 52.33638314404214
420 circle 50.60671790877907 46.371652655899126
421 circle 55.881732579385925 49.55723535777078
422 circle 54.958331039425566 46.150994168480985
423 circle 57.89014315657015 58.66934123602269
424 circle 55.625661974704826 47.0341164922508
425 circle 57.02296115557817 45.30433681623505
426 circle 51.00123862118841 48.16404373806856
427 circle 43.66881346812022 42.23024818173995
428 bullseye 52.60815257631662 41.233055709261436
429 bullseye 49.86942759243148 45.81625831270996
430 bullseye 47.42813882587756 45.96964101870084
431 bullseye 51.85891934186862 48.12702604192508
432 bullseye 58.616736121976075 48.86252208246433
433 bullseye 54.08424808536825 48.49727228515356
434 bullseye 54.90957110048451 42.91630675695116
435 bullseye 53.540898700916806 49.543756209183066
436 bullseye 48.51696006982776 52.328264614079266
437 bullseye 58.32550413783276 44.843120057753055
438 bullseye 53.03464883418758 43.60904828594963
439 bullseye 56.797521934351266 42.693888562770454
440 bullseye 53.11009195902832 43.70540046096164
441 bullseye 54.20381710804516 39.0288536425514
442 bullseye 59.30584656946277 43.17696787896868
443 bullseye 54.239578732465105 46.937384776148456
444 bullseye 50.95154195231001 43.561099316820325
445 bullseye 57.74559417420984 45.3717970344903
446 bullseye 55.084680738549245 43.99251557302813
447 bullseye 51.411274272608956 50.36123575043924
448 bullseye 57.721633984444466 47.1234179883649
449 bullseye 48.31232051343825 57.98978039875002
450 bullseye 50.573802767063164 40.29206300164106
451 bullseye 64.37769835107261 45.97983853074531
452 bullseye 48.15306924695437 44.58280116605659
453 bullseye 55.470179584089074 48.9515577879697
454 bullseye 50.59308321098564 57.113618000639796
455 bullseye 51.91368128802651 42.07656704794224
456 bullseye 57.84277359384264 54.64531423269841
457 bullseye 50.73002760953509 40.99326353480345
458 bullseye 58.12175306966242 37.76338193471019
459 bullseye 49.09351336241354 52.83723075025117
460 bullseye 53.77858980791044 46.68031325703656
461 bullseye 55.411266425977516 47.23612788227295
462 bullseye 48.80525311134764 54.38037673003094
463 bullseye 59.30005621154726 49.0266628188898
464 bullseye 54.18053606172804 59.69812403010029
465 bullseye 52.89426861993665 46.7021150908298
466 bullseye 55.45792499696202 55.35660978071169
467 bullseye 60.37108301766993 46.70713684161614
468 bullseye 55.23332498383738 41.109582664679614
469 bullseye 52.75209556833515 43.79854802260347
470 bullseye 49.972672990804064 56.407883581336634
471 bullseye 50.83085335277542 45.34197045994273
472 bullseye 55.463395151667434 54.489126334867926
473 bullseye 55.924460505555345 61.79677557010791
474 bullseye 53.7013280869954 49.29358220114873
475 bullseye 55.10240269593623 55.17524693394577
476 bullseye 56.02895580443846 47.41885149806426
477 bullseye 50.21004562037248 49.22221727980816
478 bullseye 59.60537932516784 45.948091031710646
479 bullseye 56.11577447366633 49.23073894342422
480 bullseye 57.454078633525256 43.97623129681065
481 bullseye 63.38658812210625 41.60419446485871
482 bullseye 52.53779564005585 52.69046270452823
483 bullseye 55.18693268997645 52.14993079540552
484 bullseye 52.01559066406975 47.625975086598295
485 bullseye 54.048927934555266 55.846298072679524
486 bullseye 56.761716097532194 51.98336054182336
487 bullseye 51.36829562985282 52.84441319281525
488 bullseye 54.902326654275214 54.94572657838964
489 bullseye 62.05624061967387 46.466171119864974
490 bullseye 52.37478747821975 43.67977504448815
491 bullseye 53.42568195313472 46.08987275524808
492 bullseye 55.43801759828404 47.277165915331324
493 bullseye 52.55360287687352 53.32292568063926
494 bullseye 50.248479075394194 50.16553976575672
495 bullseye 52.376233091246064 51.62184252434982
496 bullseye 59.54104618688892 46.48450873287964
497 bullseye 55.7641316290926 48.8939643286659
498 bullseye 50.80231040182761 49.20393754870969
499 bullseye 55.15648202111525 50.06435410223087
500 bullseye 53.20640441271982 48.47096150155073
501 bullseye 49.40955622062746 46.209429622600716
502 bullseye 56.223850718839195 52.46236943665765
503 bullseye 52.310740703498915 48.523570147166424
504 bullseye 54.91221330049627 49.00739973352216
505 bullseye 56.163094341201564 38.90961184808215
506 bullseye 53.802703719409706 49.19516800623382
507 bullseye 49.99867860418518 56.369928535053546
508 bullseye 54.64623706895267 55.81702372514465
509 bullseye 50.83790791266769 43.463499392373365
510 bullseye 54.897008883862725 39.60655818585671
511 bullseye 54.85986236250437 48.48609616001459
512 bullseye 58.06266112616841 51.505206705825486
513 bullseye 52.33009060045986 42.512516726148775
514 bullseye 60.849220885373185 44.03942489717425
515 bullseye 49.817901771722134 42.57571970269277
516 bullseye 58.46922126214666 45.40549897787574
517 bullseye 50.9851821352776 48.688449129516734
518 bullseye 53.172221157770835 44.609522696068765
519 bullseye 57.01596586791392 53.32657746048997
520 bullseye 57.941270035759466 51.834598348305995
521 bullseye 55.61050212452014 39.120560000190856
522 bullseye 60.677003225507875 49.509946051787765
523 bullseye 56.43262571040257 53.57481161473701
524 bullseye 59.733410011527965 50.09199005654596
525 bullseye 55.74218463718262 45.0038786127014
526 bullseye 54.13235900455504 43.72919369592823
527 bullseye 51.12023787143242 43.532127988429046
528 bullseye 53.37590443601085 53.24546593960005
529 bullseye 66.68389909316072 48.94749818773957
530 bullseye 48.60634764897552 54.32491664928128
531 bullseye 52.79686449409589 44.69695703011504
532 bullseye 54.839397911692224 41.796877002218864
533 bullseye 54.88985265603443 47.55812238400423
534 bullseye 54.3935116272619 49.907081271045854
535 bullseye 54.269969028963224 48.08169042056446
536 bullseye 55.13725807599178 50.07700313085564
537 bullseye 49.87407015873897 40.850043706572755
538 bullseye 53.75344779131114 40.836322550329456
539 bullseye 54.35012588368669 52.69373437841155
540 bullseye 56.428446687664184 42.759147965872636
541 bullseye 50.558950192169924 49.64150647500378
542 bullseye 52.39480877946596 46.41672352473947
543 bullseye 56.387625924991816 43.06356653271174
544 bullseye 52.39506116035527 43.99958761957699
545 bullseye 49.82748200785858 43.71460905664418
546 bullseye 56.709848677385644 48.259101947412105
547 bullseye 50.4833574846597 46.844491962035015
548 bullseye 47.44477077151129 44.33343596208167
549 bullseye 62.59829813946915 47.548780751671245
550 bullseye 56.92663570812157 47.672558124277586
551 bullseye 55.392676940967235 51.31629044839587
552 bullseye 49.581533609894116 47.845318713630206
553 bullseye 60.315460777665955 44.02249724973355
554 bullseye 51.73424058507112 46.46154361316499
555 bullseye 55.07609629270499 50.62458931822941
556 bullseye 59.06964661927463 50.4974911673433
557 bullseye 53.751787496806664 54.295835648054066
558 bullseye 56.81644408856633 40.52160241871492
559 bullseye 64.105472961424 56.84960694098367
560 bullseye 53.3271431089317 49.88955050869237
561 bullseye 59.29743499998336 47.138996036515586
562 bullseye 56.92140716150808 53.52289236785738
563 bullseye 49.94069636906224 44.98834073208569
564 bullseye 57.68563359124232 50.30407238574415
565 bullseye 57.69518624125579 47.33715992673141
566 bullseye 52.84397915618307 38.006802622083136
567 bullseye 49.415998293970226 48.23533047329247
568 bullseye 57.29822887825694 50.65589183267848
569 bullseye 53.48703210388919 46.64114216754652

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

452
git_profile_report.html Normal file

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,145 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "4e23ea39",
"metadata": {},
"source": [
"\n",
"# 📘 Instructor Version Introductory EDA Lab\n",
"\n",
"## Learning Objectives\n",
"\n",
"Students should:\n",
"\n",
"- Understand what a dataset structure looks like\n",
"- Identify variable types\n",
"- Compute descriptive statistics\n",
"- Recognize the limits of summary statistics\n",
"- Appreciate visualization as a fundamental step in EDA\n"
]
},
{
"cell_type": "markdown",
"id": "38ebe89c",
"metadata": {},
"source": [
"\n",
"## Teaching Strategy\n",
"\n",
"This is NOT a technical coding lab.\n",
"\n",
"It is conceptual:\n",
"- Data structure awareness\n",
"- Reading metadata\n",
"- Interpreting statistics\n",
"- Understanding why visualization matters\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ecee2660",
"metadata": {},
"outputs": [],
"source": [
"\n",
"import pyreadr\n",
"import pandas as pd\n",
"import sweetviz as sv\n",
"import dtale\n",
"\n",
"result = pyreadr.read_r(\"datasaurus_dozen.rda\")\n",
"df = list(result.values())[0]\n",
"df.head()\n"
]
},
{
"cell_type": "markdown",
"id": "ca5dfd49",
"metadata": {},
"source": [
"\n",
"## Discussion Prompts\n",
"\n",
"### After df.info():\n",
"- What is categorical?\n",
"- What is numerical?\n",
"- Why does data type matter?\n",
"\n",
"### After df.describe():\n",
"Important insight:\n",
"Different datasets may have nearly identical summary statistics.\n",
"\n",
"Ask:\n",
"Would you trust the numbers without visualization?\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7c61c04b",
"metadata": {},
"outputs": [],
"source": [
"\n",
"df.describe()\n"
]
},
{
"cell_type": "markdown",
"id": "5093ed70",
"metadata": {},
"source": [
"\n",
"## Sweetviz Discussion\n",
"\n",
"Use the report to show:\n",
"\n",
"- Similar means and standard deviations\n",
"- Very different visual distributions\n",
"- The importance of scatter plots\n",
"\n",
"Key message:\n",
"📌 \"Statistics describe. Visualization reveals.\"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8ae6139f",
"metadata": {},
"outputs": [],
"source": [
"\n",
"report = sv.analyze(df)\n",
"report.show_html(\"sweetviz_report.html\")\n"
]
},
{
"cell_type": "markdown",
"id": "d3a3d619",
"metadata": {},
"source": [
"\n",
"## Key Concept to Emphasize\n",
"\n",
"EDA is:\n",
"- Understanding structure\n",
"- Understanding distributions\n",
"- Detecting anomalies\n",
"- Preparing for cleaning\n",
"\n",
"Next lab:\n",
"Students receive messy datasets with:\n",
"- Missing values\n",
"- Wrong types\n",
"- Outliers\n",
"- Inconsistent categories\n"
]
}
],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 5
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

13332
sweetviz_report.html Normal file

File diff suppressed because one or more lines are too long

1945
task0_sweetviz_report.html Normal file

File diff suppressed because one or more lines are too long

37
y-prof.py Normal file
View File

@@ -0,0 +1,37 @@
import pandas as pd
import seaborn as sns
from ydata_profiling import ProfileReport
import pyreadr
# result = pyreadr.read_r("./data/datasaurus_dozen.rda")
# result.keys()
# df = list(result.values())[0]
# df.head()
# profile = ProfileReport(df, title="Datasaurus Dataset Profile", explorative=True)
# profile.to_file("datasaurus_profile_report.html")
# exit()
# Load the penguins dataset
df = sns.load_dataset("penguins")
csv_path = 'dataset_D_git_classroom_activity_v2.csv' # or D/E/F
df_raw = pd.read_csv(csv_path, dtype=str)
# Display basic info about the dataset
print("Dataset shape:", df.shape)
print("\nFirst 5 rows:")
print(df_raw.head())
# Generate and save the profile report
profile = ProfileReport(df_raw, title="Indie Games Telemetry Dataset Profile", explorative=True)
profile.to_file("git_profile_report.html")
print("\nProfile report saved as 'penguins_profile_report.html'")
print("Open this file in a web browser to view the detailed analysis.")

452
ydata_profile_report.html Normal file

File diff suppressed because one or more lines are too long