diff --git a/stack_overflow_programming_language_analysis/app.py b/stack_overflow_programming_language_analysis/app.py index 801c363..6fa090f 100644 --- a/stack_overflow_programming_language_analysis/app.py +++ b/stack_overflow_programming_language_analysis/app.py @@ -13,7 +13,7 @@ # Load and preprocess data using st.cache st.cache_data(hash_funcs={pd.DataFrame: lambda _: None}) def load_data(): - df = pd.read_csv('TotalQuestions.csv', parse_dates=['Month']) + df = pd.read_csv('stack_overflow_dataset_programming_language.csv', parse_dates=['Month']) df.set_index('Month', inplace=True) return df diff --git a/stack_overflow_programming_language_analysis/stack_overflow_bar_race.ipynb b/stack_overflow_programming_language_analysis/stack_overflow_bar_race.ipynb index 60610ca..b3dd7e5 100644 --- a/stack_overflow_programming_language_analysis/stack_overflow_bar_race.ipynb +++ b/stack_overflow_programming_language_analysis/stack_overflow_bar_race.ipynb @@ -1,135 +1,135 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { "colab": { - "provenance": [] + "base_uri": "https://localhost:8080/", + "height": 971 }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" + "id": "WhSblK0Tnn6K", + "outputId": "dd5a905b-4ae4-4fcc-e13a-d02d100e51a7" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: bar_chart_race in /usr/local/lib/python3.10/dist-packages (0.1.0)\n", + "Requirement already satisfied: pandas>=0.24 in /usr/local/lib/python3.10/dist-packages (from bar_chart_race) (2.0.3)\n", + "Requirement already satisfied: matplotlib>=3.1 in /usr/local/lib/python3.10/dist-packages (from bar_chart_race) (3.7.1)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.1->bar_chart_race) (1.2.1)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.1->bar_chart_race) (0.12.1)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.1->bar_chart_race) (4.53.0)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.1->bar_chart_race) (1.4.5)\n", + "Requirement already satisfied: numpy>=1.20 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.1->bar_chart_race) (1.25.2)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.1->bar_chart_race) (24.1)\n", + "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.1->bar_chart_race) (9.4.0)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.1->bar_chart_race) (3.1.2)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.1->bar_chart_race) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=0.24->bar_chart_race) (2023.4)\n", + "Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=0.24->bar_chart_race) (2024.1)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib>=3.1->bar_chart_race) (1.16.0)\n" + ] }, - "language_info": { - "name": "python" - } - }, - "cells": [ { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 971 - }, - "id": "WhSblK0Tnn6K", - "outputId": "dd5a905b-4ae4-4fcc-e13a-d02d100e51a7" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Requirement already satisfied: bar_chart_race in /usr/local/lib/python3.10/dist-packages (0.1.0)\n", - "Requirement already satisfied: pandas>=0.24 in /usr/local/lib/python3.10/dist-packages (from bar_chart_race) (2.0.3)\n", - "Requirement already satisfied: matplotlib>=3.1 in /usr/local/lib/python3.10/dist-packages (from bar_chart_race) (3.7.1)\n", - "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.1->bar_chart_race) (1.2.1)\n", - "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.1->bar_chart_race) (0.12.1)\n", - "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.1->bar_chart_race) (4.53.0)\n", - "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.1->bar_chart_race) (1.4.5)\n", - "Requirement already satisfied: numpy>=1.20 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.1->bar_chart_race) (1.25.2)\n", - "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.1->bar_chart_race) (24.1)\n", - "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.1->bar_chart_race) (9.4.0)\n", - "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.1->bar_chart_race) (3.1.2)\n", - "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.1->bar_chart_race) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=0.24->bar_chart_race) (2023.4)\n", - "Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=0.24->bar_chart_race) (2024.1)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib>=3.1->bar_chart_race) (1.16.0)\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "/usr/local/lib/python3.10/dist-packages/bar_chart_race/_make_chart.py:286: UserWarning: FixedFormatter should only be used together with FixedLocator\n", - " ax.set_yticklabels(self.df_values.columns)\n", - "/usr/local/lib/python3.10/dist-packages/bar_chart_race/_make_chart.py:287: UserWarning: FixedFormatter should only be used together with FixedLocator\n", - " ax.set_xticklabels([max_val] * len(ax.get_xticks()))\n" - ] - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ], - "text/html": [ - "" - ] - }, - "metadata": {}, - "execution_count": 2 - } + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.10/dist-packages/bar_chart_race/_make_chart.py:286: UserWarning: FixedFormatter should only be used together with FixedLocator\n", + " ax.set_yticklabels(self.df_values.columns)\n", + "/usr/local/lib/python3.10/dist-packages/bar_chart_race/_make_chart.py:287: UserWarning: FixedFormatter should only be used together with FixedLocator\n", + " ax.set_xticklabels([max_val] * len(ax.get_xticks()))\n" + ] + }, + { + "data": { + "text/html": [ + "" ], - "source": [ - "import sys\n", - "!{sys.executable} -m pip install bar_chart_race\n", - "\n", - "import pandas as pd\n", - "df=pd.read_csv('/content/TotalQuestions.csv')\n", - "import bar_chart_race as bcr\n", - "from IPython.display import Video\n", - "\n", - "# Assuming 'data' is your original DataFrame\n", - "df['Date'] = pd.to_datetime(df['Month']).dt.strftime('%Y-%m')\n", - "data = df.drop('Month', axis=1)\n", - "\n", - "# Make a copy of the DataFrame\n", - "df2 = data.copy()\n", - "df2.index = df2['Date'].tolist()\n", - "df2 = df2.drop('Date', axis=1)\n", - "\n", - "def make_bcr(df):\n", - " bcr.bar_chart_race(\n", - " df=df,\n", - " filename='stack_overflow.mp4',\n", - " orientation='h',\n", - " sort='desc',\n", - " n_bars=15,\n", - " fixed_order=False,\n", - " fixed_max=False,\n", - " steps_per_period=6, # Speed control\n", - " interpolate_period=False,\n", - " label_bars=True,\n", - " bar_size=.95,\n", - " period_label={'x': .99, 'y': .25, 'ha': 'right', 'va': 'center'},\n", - " period_summary_func=lambda v, r: {'x': .99, 'y': .18, 's': '',\n", - " 'ha': 'right', 'size': 8, 'family': 'Courier New'},\n", - " period_length=400,\n", - " figsize=(10, 6), # Larger figure size\n", - " dpi=100, # Lower DPI for larger size\n", - " cmap='dark24',\n", - " title='Stack Overflow Questions Bar Chart Race',\n", - " title_size=15, # Larger title size\n", - " bar_label_size=10, # Larger bar label size\n", - " tick_label_size=8, # Larger tick label size\n", - " shared_fontdict={'color': '.1'},\n", - " scale='linear',\n", - " writer=None,\n", - " fig=None,\n", - " bar_kwargs={'alpha': .7},\n", - " filter_column_colors=True\n", - " )\n", - "\n", - "# Generate the bar chart race for df2\n", - "make_bcr(df2)\n", - "\n", - "# Display the resulting video\n", - "Video(\"stack_overflow.mp4\", width=900, height=600, embed=True)\n" + "text/plain": [ + "" ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" } - ] -} \ No newline at end of file + ], + "source": [ + "import sys\n", + "!{sys.executable} -m pip install bar_chart_race\n", + "\n", + "import pandas as pd\n", + "df=pd.read_csv('stack_overflow_dataset_programming_language.csv')\n", + "import bar_chart_race as bcr\n", + "from IPython.display import Video\n", + "\n", + "# Assuming 'data' is your original DataFrame\n", + "df['Date'] = pd.to_datetime(df['Month']).dt.strftime('%Y-%m')\n", + "data = df.drop('Month', axis=1)\n", + "\n", + "# Make a copy of the DataFrame\n", + "df2 = data.copy()\n", + "df2.index = df2['Date'].tolist()\n", + "df2 = df2.drop('Date', axis=1)\n", + "\n", + "def make_bcr(df):\n", + " bcr.bar_chart_race(\n", + " df=df,\n", + " filename='stack_overflow.mp4',\n", + " orientation='h',\n", + " sort='desc',\n", + " n_bars=15,\n", + " fixed_order=False,\n", + " fixed_max=False,\n", + " steps_per_period=6, # Speed control\n", + " interpolate_period=False,\n", + " label_bars=True,\n", + " bar_size=.95,\n", + " period_label={'x': .99, 'y': .25, 'ha': 'right', 'va': 'center'},\n", + " period_summary_func=lambda v, r: {'x': .99, 'y': .18, 's': '',\n", + " 'ha': 'right', 'size': 8, 'family': 'Courier New'},\n", + " period_length=400,\n", + " figsize=(10, 6), # Larger figure size\n", + " dpi=100, # Lower DPI for larger size\n", + " cmap='dark24',\n", + " title='Stack Overflow Questions Bar Chart Race',\n", + " title_size=15, # Larger title size\n", + " bar_label_size=10, # Larger bar label size\n", + " tick_label_size=8, # Larger tick label size\n", + " shared_fontdict={'color': '.1'},\n", + " scale='linear',\n", + " writer=None,\n", + " fig=None,\n", + " bar_kwargs={'alpha': .7},\n", + " filter_column_colors=True\n", + " )\n", + "\n", + "# Generate the bar chart race for df2\n", + "make_bcr(df2)\n", + "\n", + "# Display the resulting video\n", + "Video(\"stack_overflow.mp4\", width=900, height=600, embed=True)\n" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}