{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Interactive EDA Lab Starter (SweetViz & D‑Tale)\n", "Use this notebook to explore the datasets and practice cleaning common EDA issues." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# Optional installs:\n", "# !pip install -q sweetviz dtale pandas numpy\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
session_iduser_idstart_timeend_timesession_length_sregionplatformgpu_modelavg_fpsping_msmap_namecrash_flagpurchase_amountparty_sizeinput_methodbuild_versionis_featured_eventdevice_temp_csession_typeis_long_session
0sess_c2fba8e7f37auser_4882025-07-18T18:32:00Z2025-07-18 20:03:21-05:005481.0us-westpcGTX108083.52431.16oceanYes0,002TouchNaNNo85.6rankedTrue
1sess_33d286298cf9user_15112025-06-13 23:21:08+00:002025-06-13 23:36:30+01:00922.0Us-eastPlayStationNaN72.7529.12desertNo0.03TouchNaN062.0casual0
2sess_be2bb4d8986auser_8302025-10-20 02:42:07-05:0020/10/2025 02:49451.0sa-east-1PlayStationNaN69.240.47ForestFalse0.05TOUCH1.4False69.0rankedFalse
3sess_7f425ca9a0e2user_108/01/2025 06:352025-08-01T08:32:45Z7031.0sa-east-1PlayStationNaN33.2992.4DesertNo17.551Controller1.3.2048.1casualTrue
4sess_5657e28b22ecuser_2112025-09-08T23:41:44Z2025-09-09 00:32:59+01:003075.0US-EASTswitchNaN69.9612.63DesertFalse0.02controllrNaN054.7casualYes
\n", "
" ], "text/plain": [ " session_id user_id start_time \\\n", "0 sess_c2fba8e7f37a user_488 2025-07-18T18:32:00Z \n", "1 sess_33d286298cf9 user_1511 2025-06-13 23:21:08+00:00 \n", "2 sess_be2bb4d8986a user_830 2025-10-20 02:42:07-05:00 \n", "3 sess_7f425ca9a0e2 user_1 08/01/2025 06:35 \n", "4 sess_5657e28b22ec user_211 2025-09-08T23:41:44Z \n", "\n", " end_time session_length_s region platform \\\n", "0 2025-07-18 20:03:21-05:00 5481.0 us-west pc \n", "1 2025-06-13 23:36:30+01:00 922.0 Us-east PlayStation \n", "2 20/10/2025 02:49 451.0 sa-east-1 PlayStation \n", "3 2025-08-01T08:32:45Z 7031.0 sa-east-1 PlayStation \n", "4 2025-09-09 00:32:59+01:00 3075.0 US-EAST switch \n", "\n", " gpu_model avg_fps ping_ms map_name crash_flag purchase_amount party_size \\\n", "0 GTX1080 83.52 431.16 ocean Yes 0,00 2 \n", "1 NaN 72.75 29.12 desert No 0.0 3 \n", "2 NaN 69.2 40.47 Forest False 0.0 5 \n", "3 NaN 33.29 92.4 Desert No 17.55 1 \n", "4 NaN 69.96 12.63 Desert False 0.0 2 \n", "\n", " input_method build_version is_featured_event device_temp_c session_type \\\n", "0 Touch NaN No 85.6 ranked \n", "1 Touch NaN 0 62.0 casual \n", "2 TOUCH 1.4 False 69.0 ranked \n", "3 Controller 1.3.2 0 48.1 casual \n", "4 controllr NaN 0 54.7 casual \n", "\n", " is_long_session \n", "0 True \n", "1 0 \n", "2 False \n", "3 True \n", "4 Yes " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "csv_path = 'dataset_A_indie_game_telemetry_v2.csv' # or D/E/F\n", "df_raw = pd.read_csv(csv_path, dtype=str)\n", "df_raw.head()\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "84e6ef6f5e434536a0dcf69111e15ef9", "version_major": 2, "version_minor": 0 }, "text/plain": [ " | | [ 0%] 00:00 -> (? left)" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Report sweetviz_report.html was generated! NOTEBOOK/COLAB USERS: the web browser MAY not pop up, regardless, the report IS saved in your notebook/colab files.\n", "SweetViz report written to sweetviz_report.html\n" ] } ], "source": [ "import sweetviz as sv\n", "report = sv.analyze(df_raw)\n", "report.show_html('sweetviz_report.html')\n", "print('SweetViz report written to sweetviz_report.html')\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "import dtale\n", "d = dtale.show(df_raw)\n", "d.open_browser()\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## EDA Quests\n", "- Parse timestamps\n", "- Fix units (minutes vs seconds)\n", "- Normalize categories & booleans\n", "- Convert comma-decimals to floats\n", "- Identify & treat outliers\n", "- Re-run SweetViz and compare before/after\n" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.9" } }, "nbformat": 4, "nbformat_minor": 5 }