diff --git a/openstudioreleases/scripts/Analyze_data.ipynb b/openstudioreleases/scripts/Analyze_data.ipynb new file mode 100644 index 0000000..67668f5 --- /dev/null +++ b/openstudioreleases/scripts/Analyze_data.ipynb @@ -0,0 +1,1908 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "d7a6fcac", + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "import json\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "\n", + "import pycountry\n", + "import pycountry_convert" + ] + }, + { + "cell_type": "markdown", + "id": "ef4543af", + "metadata": {}, + "source": [ + "# Get Users" + ] + }, + { + "cell_type": "markdown", + "id": "7227edea", + "metadata": {}, + "source": [ + "## Via the Typescript script" + ] + }, + { + "cell_type": "raw", + "id": "11eb6a0f", + "metadata": {}, + "source": [ + "!yarn es scripts/getUsers.ts" + ] + }, + { + "cell_type": "raw", + "id": "f6b4480a", + "metadata": {}, + "source": [ + "df_users = pd.read_json('../users.json')" + ] + }, + { + "cell_type": "markdown", + "id": "06d212e7", + "metadata": {}, + "source": [ + "## Directly from Python" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b4fa404d", + "metadata": {}, + "outputs": [], + "source": [ + "import dataclasses\n", + "import firebase_admin\n", + "from firebase_admin import firestore \n", + "import json\n", + "from pathlib import Path\n", + "from typing import Optional\n", + "\n", + "FIREBASE_SERVICE_ACCOUNT = json.loads(Path('../firebase-service-account.json').read_text())\n", + "\n", + "if not firebase_admin._apps:\n", + " cert = firebase_admin.credentials.Certificate(cert=Path('../firebase-service-account.json'))\n", + " app = firebase_admin.initialize_app(credential=cert, name='osc-downloads')\n", + " db = firestore.client(app=app)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f8b8bff", + "metadata": {}, + "outputs": [], + "source": [ + "@dataclasses.dataclass\n", + "class User:\n", + " uid: str\n", + " lastName: str\n", + " firstName: str\n", + " email: str\n", + " country: str\n", + " company: Optional[str]\n", + " occupation: Optional[str]\n", + " joinBetaTester: bool\n", + " signMailingList: bool\n", + "\n", + "assert [x.id for x in db.collections()] == ['users']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb9e26e3", + "metadata": {}, + "outputs": [], + "source": [ + "usersRef = db.collection('users')\n", + "\n", + "snapshot = usersRef.get() # Could use .stream() for a generator instead\n", + "\n", + "print(f\"There are {len(snapshot)} users in the Firestore database\")\n", + "\n", + "users = []\n", + "for doc in snapshot:\n", + " users.append(User(uid=doc.id, **doc.to_dict()))\n", + " \n", + "user_uids = [u.uid for u in users]\n", + "df_users = pd.DataFrame([dataclasses.asdict(u) for u in users])" + ] + }, + { + "cell_type": "markdown", + "id": "011be8d3", + "metadata": {}, + "source": [ + "# Look for duplicates" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aeb9218a", + "metadata": {}, + "outputs": [], + "source": [ + "print(f\"There are {df_users.shape[0]} users in the Firestore database\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "994efa8d", + "metadata": {}, + "outputs": [], + "source": [ + "df_users[df_users['firstName'].duplicated(keep=False)].sort_values(by='firstName')" + ] + }, + { + "cell_type": "markdown", + "id": "245b7bcb", + "metadata": {}, + "source": [ + "# Count Users per country / continent" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0e273db3", + "metadata": {}, + "outputs": [], + "source": [ + "df_users[df_users['country'] == 'France']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "53eabd1c", + "metadata": {}, + "outputs": [], + "source": [ + "# from src/fields/CountrySelect.tsx\n", + "country_codes = json.loads(\"\"\"\n", + "[\n", + " {\n", + " \"code\": \"AD\",\n", + " \"label\": \"Andorra\",\n", + " \"phone\": \"376\"\n", + " },\n", + " {\n", + " \"code\": \"AE\",\n", + " \"label\": \"United Arab Emirates\",\n", + " \"phone\": \"971\"\n", + " },\n", + " {\n", + " \"code\": \"AF\",\n", + " \"label\": \"Afghanistan\",\n", + " \"phone\": \"93\"\n", + " },\n", + " {\n", + " \"code\": \"AG\",\n", + " \"label\": \"Antigua and Barbuda\",\n", + " \"phone\": \"1-268\"\n", + " },\n", + " {\n", + " \"code\": \"AI\",\n", + " \"label\": \"Anguilla\",\n", + " \"phone\": \"1-264\"\n", + " },\n", + " {\n", + " \"code\": \"AL\",\n", + " \"label\": \"Albania\",\n", + " \"phone\": \"355\"\n", + " },\n", + " {\n", + " \"code\": \"AM\",\n", + " \"label\": \"Armenia\",\n", + " \"phone\": \"374\"\n", + " },\n", + " {\n", + " \"code\": \"AO\",\n", + " \"label\": \"Angola\",\n", + " \"phone\": \"244\"\n", + " },\n", + " {\n", + " \"code\": \"AQ\",\n", + " \"label\": \"Antarctica\",\n", + " \"phone\": \"672\"\n", + " },\n", + " {\n", + " \"code\": \"AR\",\n", + " \"label\": \"Argentina\",\n", + " \"phone\": \"54\"\n", + " },\n", + " {\n", + " \"code\": \"AS\",\n", + " \"label\": \"American Samoa\",\n", + " \"phone\": \"1-684\"\n", + " },\n", + " {\n", + " \"code\": \"AT\",\n", + " \"label\": \"Austria\",\n", + " \"phone\": \"43\"\n", + " },\n", + " {\n", + " \"code\": \"AU\",\n", + " \"label\": \"Australia\",\n", + " \"phone\": \"61\",\n", + " \"suggested\": true\n", + " },\n", + " {\n", + " \"code\": \"AW\",\n", + " \"label\": \"Aruba\",\n", + " \"phone\": \"297\"\n", + " },\n", + " {\n", + " \"code\": \"AX\",\n", + " \"label\": \"Alland Islands\",\n", + " \"phone\": \"358\"\n", + " },\n", + " {\n", + " \"code\": \"AZ\",\n", + " \"label\": \"Azerbaijan\",\n", + " \"phone\": \"994\"\n", + " },\n", + " {\n", + " \"code\": \"BA\",\n", + " \"label\": \"Bosnia and Herzegovina\",\n", + " \"phone\": \"387\"\n", + " },\n", + " {\n", + " \"code\": \"BB\",\n", + " \"label\": \"Barbados\",\n", + " \"phone\": \"1-246\"\n", + " },\n", + " {\n", + " \"code\": \"BD\",\n", + " \"label\": \"Bangladesh\",\n", + " \"phone\": \"880\"\n", + " },\n", + " {\n", + " \"code\": \"BE\",\n", + " \"label\": \"Belgium\",\n", + " \"phone\": \"32\"\n", + " },\n", + " {\n", + " \"code\": \"BF\",\n", + " \"label\": \"Burkina Faso\",\n", + " \"phone\": \"226\"\n", + " },\n", + " {\n", + " \"code\": \"BG\",\n", + " \"label\": \"Bulgaria\",\n", + " \"phone\": \"359\"\n", + " },\n", + " {\n", + " \"code\": \"BH\",\n", + " \"label\": \"Bahrain\",\n", + " \"phone\": \"973\"\n", + " },\n", + " {\n", + " \"code\": \"BI\",\n", + " \"label\": \"Burundi\",\n", + " \"phone\": \"257\"\n", + " },\n", + " {\n", + " \"code\": \"BJ\",\n", + " \"label\": \"Benin\",\n", + " \"phone\": \"229\"\n", + " },\n", + " {\n", + " \"code\": \"BL\",\n", + " \"label\": \"Saint Barthelemy\",\n", + " \"phone\": \"590\"\n", + " },\n", + " {\n", + " \"code\": \"BM\",\n", + " \"label\": \"Bermuda\",\n", + " \"phone\": \"1-441\"\n", + " },\n", + " {\n", + " \"code\": \"BN\",\n", + " \"label\": \"Brunei Darussalam\",\n", + " \"phone\": \"673\"\n", + " },\n", + " {\n", + " \"code\": \"BO\",\n", + " \"label\": \"Bolivia\",\n", + " \"phone\": \"591\"\n", + " },\n", + " {\n", + " \"code\": \"BR\",\n", + " \"label\": \"Brazil\",\n", + " \"phone\": \"55\"\n", + " },\n", + " {\n", + " \"code\": \"BS\",\n", + " \"label\": \"Bahamas\",\n", + " \"phone\": \"1-242\"\n", + " },\n", + " {\n", + " \"code\": \"BT\",\n", + " \"label\": \"Bhutan\",\n", + " \"phone\": \"975\"\n", + " },\n", + " {\n", + " \"code\": \"BV\",\n", + " \"label\": \"Bouvet Island\",\n", + " \"phone\": \"47\"\n", + " },\n", + " {\n", + " \"code\": \"BW\",\n", + " \"label\": \"Botswana\",\n", + " \"phone\": \"267\"\n", + " },\n", + " {\n", + " \"code\": \"BY\",\n", + " \"label\": \"Belarus\",\n", + " \"phone\": \"375\"\n", + " },\n", + " {\n", + " \"code\": \"BZ\",\n", + " \"label\": \"Belize\",\n", + " \"phone\": \"501\"\n", + " },\n", + " {\n", + " \"code\": \"CA\",\n", + " \"label\": \"Canada\",\n", + " \"phone\": \"1\",\n", + " \"suggested\": true\n", + " },\n", + " {\n", + " \"code\": \"CC\",\n", + " \"label\": \"Cocos (Keeling) Islands\",\n", + " \"phone\": \"61\"\n", + " },\n", + " {\n", + " \"code\": \"CD\",\n", + " \"label\": \"Congo, Democratic Republic of the\",\n", + " \"phone\": \"243\"\n", + " },\n", + " {\n", + " \"code\": \"CF\",\n", + " \"label\": \"Central African Republic\",\n", + " \"phone\": \"236\"\n", + " },\n", + " {\n", + " \"code\": \"CG\",\n", + " \"label\": \"Congo, Republic of the\",\n", + " \"phone\": \"242\"\n", + " },\n", + " {\n", + " \"code\": \"CH\",\n", + " \"label\": \"Switzerland\",\n", + " \"phone\": \"41\"\n", + " },\n", + " {\n", + " \"code\": \"CI\",\n", + " \"label\": \"Cote d'Ivoire\",\n", + " \"phone\": \"225\"\n", + " },\n", + " {\n", + " \"code\": \"CK\",\n", + " \"label\": \"Cook Islands\",\n", + " \"phone\": \"682\"\n", + " },\n", + " {\n", + " \"code\": \"CL\",\n", + " \"label\": \"Chile\",\n", + " \"phone\": \"56\"\n", + " },\n", + " {\n", + " \"code\": \"CM\",\n", + " \"label\": \"Cameroon\",\n", + " \"phone\": \"237\"\n", + " },\n", + " {\n", + " \"code\": \"CN\",\n", + " \"label\": \"China\",\n", + " \"phone\": \"86\"\n", + " },\n", + " {\n", + " \"code\": \"CO\",\n", + " \"label\": \"Colombia\",\n", + " \"phone\": \"57\"\n", + " },\n", + " {\n", + " \"code\": \"CR\",\n", + " \"label\": \"Costa Rica\",\n", + " \"phone\": \"506\"\n", + " },\n", + " {\n", + " \"code\": \"CU\",\n", + " \"label\": \"Cuba\",\n", + " \"phone\": \"53\"\n", + " },\n", + " {\n", + " \"code\": \"CV\",\n", + " \"label\": \"Cape Verde\",\n", + " \"phone\": \"238\"\n", + " },\n", + " {\n", + " \"code\": \"CW\",\n", + " \"label\": \"Curacao\",\n", + " \"phone\": \"599\"\n", + " },\n", + " {\n", + " \"code\": \"CX\",\n", + " \"label\": \"Christmas Island\",\n", + " \"phone\": \"61\"\n", + " },\n", + " {\n", + " \"code\": \"CY\",\n", + " \"label\": \"Cyprus\",\n", + " \"phone\": \"357\"\n", + " },\n", + " {\n", + " \"code\": \"CZ\",\n", + " \"label\": \"Czech Republic\",\n", + " \"phone\": \"420\"\n", + " },\n", + " {\n", + " \"code\": \"DE\",\n", + " \"label\": \"Germany\",\n", + " \"phone\": \"49\",\n", + " \"suggested\": true\n", + " },\n", + " {\n", + " \"code\": \"DJ\",\n", + " \"label\": \"Djibouti\",\n", + " \"phone\": \"253\"\n", + " },\n", + " {\n", + " \"code\": \"DK\",\n", + " \"label\": \"Denmark\",\n", + " \"phone\": \"45\"\n", + " },\n", + " {\n", + " \"code\": \"DM\",\n", + " \"label\": \"Dominica\",\n", + " \"phone\": \"1-767\"\n", + " },\n", + " {\n", + " \"code\": \"DO\",\n", + " \"label\": \"Dominican Republic\",\n", + " \"phone\": \"1-809\"\n", + " },\n", + " {\n", + " \"code\": \"DZ\",\n", + " \"label\": \"Algeria\",\n", + " \"phone\": \"213\"\n", + " },\n", + " {\n", + " \"code\": \"EC\",\n", + " \"label\": \"Ecuador\",\n", + " \"phone\": \"593\"\n", + " },\n", + " {\n", + " \"code\": \"EE\",\n", + " \"label\": \"Estonia\",\n", + " \"phone\": \"372\"\n", + " },\n", + " {\n", + " \"code\": \"EG\",\n", + " \"label\": \"Egypt\",\n", + " \"phone\": \"20\"\n", + " },\n", + " {\n", + " \"code\": \"EH\",\n", + " \"label\": \"Western Sahara\",\n", + " \"phone\": \"212\"\n", + " },\n", + " {\n", + " \"code\": \"ER\",\n", + " \"label\": \"Eritrea\",\n", + " \"phone\": \"291\"\n", + " },\n", + " {\n", + " \"code\": \"ES\",\n", + " \"label\": \"Spain\",\n", + " \"phone\": \"34\"\n", + " },\n", + " {\n", + " \"code\": \"ET\",\n", + " \"label\": \"Ethiopia\",\n", + " \"phone\": \"251\"\n", + " },\n", + " {\n", + " \"code\": \"FI\",\n", + " \"label\": \"Finland\",\n", + " \"phone\": \"358\"\n", + " },\n", + " {\n", + " \"code\": \"FJ\",\n", + " \"label\": \"Fiji\",\n", + " \"phone\": \"679\"\n", + " },\n", + " {\n", + " \"code\": \"FK\",\n", + " \"label\": \"Falkland Islands (Malvinas)\",\n", + " \"phone\": \"500\"\n", + " },\n", + " {\n", + " \"code\": \"FM\",\n", + " \"label\": \"Micronesia, Federated States of\",\n", + " \"phone\": \"691\"\n", + " },\n", + " {\n", + " \"code\": \"FO\",\n", + " \"label\": \"Faroe Islands\",\n", + " \"phone\": \"298\"\n", + " },\n", + " {\n", + " \"code\": \"FR\",\n", + " \"label\": \"France\",\n", + " \"phone\": \"33\",\n", + " \"suggested\": true\n", + " },\n", + " {\n", + " \"code\": \"GA\",\n", + " \"label\": \"Gabon\",\n", + " \"phone\": \"241\"\n", + " },\n", + " {\n", + " \"code\": \"GB\",\n", + " \"label\": \"United Kingdom\",\n", + " \"phone\": \"44\"\n", + " },\n", + " {\n", + " \"code\": \"GD\",\n", + " \"label\": \"Grenada\",\n", + " \"phone\": \"1-473\"\n", + " },\n", + " {\n", + " \"code\": \"GE\",\n", + " \"label\": \"Georgia\",\n", + " \"phone\": \"995\"\n", + " },\n", + " {\n", + " \"code\": \"GF\",\n", + " \"label\": \"French Guiana\",\n", + " \"phone\": \"594\"\n", + " },\n", + " {\n", + " \"code\": \"GG\",\n", + " \"label\": \"Guernsey\",\n", + " \"phone\": \"44\"\n", + " },\n", + " {\n", + " \"code\": \"GH\",\n", + " \"label\": \"Ghana\",\n", + " \"phone\": \"233\"\n", + " },\n", + " {\n", + " \"code\": \"GI\",\n", + " \"label\": \"Gibraltar\",\n", + " \"phone\": \"350\"\n", + " },\n", + " {\n", + " \"code\": \"GL\",\n", + " \"label\": \"Greenland\",\n", + " \"phone\": \"299\"\n", + " },\n", + " {\n", + " \"code\": \"GM\",\n", + " \"label\": \"Gambia\",\n", + " \"phone\": \"220\"\n", + " },\n", + " {\n", + " \"code\": \"GN\",\n", + " \"label\": \"Guinea\",\n", + " \"phone\": \"224\"\n", + " },\n", + " {\n", + " \"code\": \"GP\",\n", + " \"label\": \"Guadeloupe\",\n", + " \"phone\": \"590\"\n", + " },\n", + " {\n", + " \"code\": \"GQ\",\n", + " \"label\": \"Equatorial Guinea\",\n", + " \"phone\": \"240\"\n", + " },\n", + " {\n", + " \"code\": \"GR\",\n", + " \"label\": \"Greece\",\n", + " \"phone\": \"30\"\n", + " },\n", + " {\n", + " \"code\": \"GS\",\n", + " \"label\": \"South Georgia and the South Sandwich Islands\",\n", + " \"phone\": \"500\"\n", + " },\n", + " {\n", + " \"code\": \"GT\",\n", + " \"label\": \"Guatemala\",\n", + " \"phone\": \"502\"\n", + " },\n", + " {\n", + " \"code\": \"GU\",\n", + " \"label\": \"Guam\",\n", + " \"phone\": \"1-671\"\n", + " },\n", + " {\n", + " \"code\": \"GW\",\n", + " \"label\": \"Guinea-Bissau\",\n", + " \"phone\": \"245\"\n", + " },\n", + " {\n", + " \"code\": \"GY\",\n", + " \"label\": \"Guyana\",\n", + " \"phone\": \"592\"\n", + " },\n", + " {\n", + " \"code\": \"HK\",\n", + " \"label\": \"Hong Kong\",\n", + " \"phone\": \"852\"\n", + " },\n", + " {\n", + " \"code\": \"HM\",\n", + " \"label\": \"Heard Island and McDonald Islands\",\n", + " \"phone\": \"672\"\n", + " },\n", + " {\n", + " \"code\": \"HN\",\n", + " \"label\": \"Honduras\",\n", + " \"phone\": \"504\"\n", + " },\n", + " {\n", + " \"code\": \"HR\",\n", + " \"label\": \"Croatia\",\n", + " \"phone\": \"385\"\n", + " },\n", + " {\n", + " \"code\": \"HT\",\n", + " \"label\": \"Haiti\",\n", + " \"phone\": \"509\"\n", + " },\n", + " {\n", + " \"code\": \"HU\",\n", + " \"label\": \"Hungary\",\n", + " \"phone\": \"36\"\n", + " },\n", + " {\n", + " \"code\": \"ID\",\n", + " \"label\": \"Indonesia\",\n", + " \"phone\": \"62\"\n", + " },\n", + " {\n", + " \"code\": \"IE\",\n", + " \"label\": \"Ireland\",\n", + " \"phone\": \"353\"\n", + " },\n", + " {\n", + " \"code\": \"IL\",\n", + " \"label\": \"Israel\",\n", + " \"phone\": \"972\"\n", + " },\n", + " {\n", + " \"code\": \"IM\",\n", + " \"label\": \"Isle of Man\",\n", + " \"phone\": \"44\"\n", + " },\n", + " {\n", + " \"code\": \"IN\",\n", + " \"label\": \"India\",\n", + " \"phone\": \"91\"\n", + " },\n", + " {\n", + " \"code\": \"IO\",\n", + " \"label\": \"British Indian Ocean Territory\",\n", + " \"phone\": \"246\"\n", + " },\n", + " {\n", + " \"code\": \"IQ\",\n", + " \"label\": \"Iraq\",\n", + " \"phone\": \"964\"\n", + " },\n", + " {\n", + " \"code\": \"IR\",\n", + " \"label\": \"Iran, Islamic Republic of\",\n", + " \"phone\": \"98\"\n", + " },\n", + " {\n", + " \"code\": \"IS\",\n", + " \"label\": \"Iceland\",\n", + " \"phone\": \"354\"\n", + " },\n", + " {\n", + " \"code\": \"IT\",\n", + " \"label\": \"Italy\",\n", + " \"phone\": \"39\"\n", + " },\n", + " {\n", + " \"code\": \"JE\",\n", + " \"label\": \"Jersey\",\n", + " \"phone\": \"44\"\n", + " },\n", + " {\n", + " \"code\": \"JM\",\n", + "\n", + " \"label\": \"Jamaica\",\n", + " \"phone\": \"1-876\"\n", + " },\n", + " {\n", + " \"code\": \"JO\",\n", + " \"label\": \"Jordan\",\n", + " \"phone\": \"962\"\n", + " },\n", + " {\n", + " \"code\": \"JP\",\n", + " \"label\": \"Japan\",\n", + " \"phone\": \"81\",\n", + " \"suggested\": true\n", + " },\n", + " {\n", + " \"code\": \"KE\",\n", + " \"label\": \"Kenya\",\n", + " \"phone\": \"254\"\n", + " },\n", + " {\n", + " \"code\": \"KG\",\n", + " \"label\": \"Kyrgyzstan\",\n", + " \"phone\": \"996\"\n", + " },\n", + " {\n", + " \"code\": \"KH\",\n", + " \"label\": \"Cambodia\",\n", + " \"phone\": \"855\"\n", + " },\n", + " {\n", + " \"code\": \"KI\",\n", + " \"label\": \"Kiribati\",\n", + " \"phone\": \"686\"\n", + " },\n", + " {\n", + " \"code\": \"KM\",\n", + " \"label\": \"Comoros\",\n", + " \"phone\": \"269\"\n", + " },\n", + " {\n", + " \"code\": \"KN\",\n", + " \"label\": \"Saint Kitts and Nevis\",\n", + " \"phone\": \"1-869\"\n", + " },\n", + " {\n", + " \"code\": \"KP\",\n", + " \"label\": \"Korea, Democratic People's Republic of\",\n", + " \"phone\": \"850\"\n", + " },\n", + " {\n", + " \"code\": \"KR\",\n", + " \"label\": \"Korea, Republic of\",\n", + " \"phone\": \"82\"\n", + " },\n", + " {\n", + " \"code\": \"KW\",\n", + " \"label\": \"Kuwait\",\n", + " \"phone\": \"965\"\n", + " },\n", + " {\n", + " \"code\": \"KY\",\n", + " \"label\": \"Cayman Islands\",\n", + " \"phone\": \"1-345\"\n", + " },\n", + " {\n", + " \"code\": \"KZ\",\n", + " \"label\": \"Kazakhstan\",\n", + " \"phone\": \"7\"\n", + " },\n", + " {\n", + " \"code\": \"LA\",\n", + " \"label\": \"Lao People's Democratic Republic\",\n", + " \"phone\": \"856\"\n", + " },\n", + " {\n", + " \"code\": \"LB\",\n", + " \"label\": \"Lebanon\",\n", + " \"phone\": \"961\"\n", + " },\n", + " {\n", + " \"code\": \"LC\",\n", + " \"label\": \"Saint Lucia\",\n", + " \"phone\": \"1-758\"\n", + " },\n", + " {\n", + " \"code\": \"LI\",\n", + " \"label\": \"Liechtenstein\",\n", + " \"phone\": \"423\"\n", + " },\n", + " {\n", + " \"code\": \"LK\",\n", + " \"label\": \"Sri Lanka\",\n", + " \"phone\": \"94\"\n", + " },\n", + " {\n", + " \"code\": \"LR\",\n", + " \"label\": \"Liberia\",\n", + " \"phone\": \"231\"\n", + " },\n", + " {\n", + " \"code\": \"LS\",\n", + " \"label\": \"Lesotho\",\n", + " \"phone\": \"266\"\n", + " },\n", + " {\n", + " \"code\": \"LT\",\n", + " \"label\": \"Lithuania\",\n", + " \"phone\": \"370\"\n", + " },\n", + " {\n", + " \"code\": \"LU\",\n", + " \"label\": \"Luxembourg\",\n", + " \"phone\": \"352\"\n", + " },\n", + " {\n", + " \"code\": \"LV\",\n", + " \"label\": \"Latvia\",\n", + " \"phone\": \"371\"\n", + " },\n", + " {\n", + " \"code\": \"LY\",\n", + " \"label\": \"Libya\",\n", + " \"phone\": \"218\"\n", + " },\n", + " {\n", + " \"code\": \"MA\",\n", + " \"label\": \"Morocco\",\n", + " \"phone\": \"212\"\n", + " },\n", + " {\n", + " \"code\": \"MC\",\n", + " \"label\": \"Monaco\",\n", + " \"phone\": \"377\"\n", + " },\n", + " {\n", + " \"code\": \"MD\",\n", + " \"label\": \"Moldova, Republic of\",\n", + " \"phone\": \"373\"\n", + " },\n", + " {\n", + " \"code\": \"ME\",\n", + " \"label\": \"Montenegro\",\n", + " \"phone\": \"382\"\n", + " },\n", + " {\n", + " \"code\": \"MF\",\n", + " \"label\": \"Saint Martin (French part)\",\n", + " \"phone\": \"590\"\n", + " },\n", + " {\n", + " \"code\": \"MG\",\n", + " \"label\": \"Madagascar\",\n", + " \"phone\": \"261\"\n", + " },\n", + " {\n", + " \"code\": \"MH\",\n", + " \"label\": \"Marshall Islands\",\n", + " \"phone\": \"692\"\n", + " },\n", + " {\n", + " \"code\": \"MK\",\n", + " \"label\": \"Macedonia, the Former Yugoslav Republic of\",\n", + " \"phone\": \"389\"\n", + " },\n", + " {\n", + " \"code\": \"ML\",\n", + " \"label\": \"Mali\",\n", + " \"phone\": \"223\"\n", + " },\n", + " {\n", + " \"code\": \"MM\",\n", + " \"label\": \"Myanmar\",\n", + " \"phone\": \"95\"\n", + " },\n", + " {\n", + " \"code\": \"MN\",\n", + " \"label\": \"Mongolia\",\n", + " \"phone\": \"976\"\n", + " },\n", + " {\n", + " \"code\": \"MO\",\n", + " \"label\": \"Macao\",\n", + " \"phone\": \"853\"\n", + " },\n", + " {\n", + " \"code\": \"MP\",\n", + " \"label\": \"Northern Mariana Islands\",\n", + " \"phone\": \"1-670\"\n", + " },\n", + " {\n", + " \"code\": \"MQ\",\n", + " \"label\": \"Martinique\",\n", + " \"phone\": \"596\"\n", + " },\n", + " {\n", + " \"code\": \"MR\",\n", + " \"label\": \"Mauritania\",\n", + " \"phone\": \"222\"\n", + " },\n", + " {\n", + " \"code\": \"MS\",\n", + " \"label\": \"Montserrat\",\n", + " \"phone\": \"1-664\"\n", + " },\n", + " {\n", + " \"code\": \"MT\",\n", + " \"label\": \"Malta\",\n", + " \"phone\": \"356\"\n", + " },\n", + " {\n", + " \"code\": \"MU\",\n", + " \"label\": \"Mauritius\",\n", + " \"phone\": \"230\"\n", + " },\n", + " {\n", + " \"code\": \"MV\",\n", + " \"label\": \"Maldives\",\n", + " \"phone\": \"960\"\n", + " },\n", + " {\n", + " \"code\": \"MW\",\n", + " \"label\": \"Malawi\",\n", + " \"phone\": \"265\"\n", + " },\n", + " {\n", + " \"code\": \"MX\",\n", + " \"label\": \"Mexico\",\n", + " \"phone\": \"52\"\n", + " },\n", + " {\n", + " \"code\": \"MY\",\n", + " \"label\": \"Malaysia\",\n", + " \"phone\": \"60\"\n", + " },\n", + " {\n", + " \"code\": \"MZ\",\n", + " \"label\": \"Mozambique\",\n", + " \"phone\": \"258\"\n", + " },\n", + " {\n", + " \"code\": \"NA\",\n", + " \"label\": \"Namibia\",\n", + " \"phone\": \"264\"\n", + " },\n", + " {\n", + " \"code\": \"NC\",\n", + " \"label\": \"New Caledonia\",\n", + " \"phone\": \"687\"\n", + " },\n", + " {\n", + " \"code\": \"NE\",\n", + " \"label\": \"Niger\",\n", + " \"phone\": \"227\"\n", + " },\n", + " {\n", + " \"code\": \"NF\",\n", + " \"label\": \"Norfolk Island\",\n", + " \"phone\": \"672\"\n", + " },\n", + " {\n", + " \"code\": \"NG\",\n", + " \"label\": \"Nigeria\",\n", + " \"phone\": \"234\"\n", + " },\n", + " {\n", + " \"code\": \"NI\",\n", + " \"label\": \"Nicaragua\",\n", + " \"phone\": \"505\"\n", + " },\n", + " {\n", + " \"code\": \"NL\",\n", + " \"label\": \"Netherlands\",\n", + " \"phone\": \"31\"\n", + " },\n", + " {\n", + " \"code\": \"NO\",\n", + " \"label\": \"Norway\",\n", + " \"phone\": \"47\"\n", + " },\n", + " {\n", + " \"code\": \"NP\",\n", + " \"label\": \"Nepal\",\n", + " \"phone\": \"977\"\n", + " },\n", + " {\n", + " \"code\": \"NR\",\n", + " \"label\": \"Nauru\",\n", + " \"phone\": \"674\"\n", + " },\n", + " {\n", + " \"code\": \"NU\",\n", + " \"label\": \"Niue\",\n", + " \"phone\": \"683\"\n", + " },\n", + " {\n", + " \"code\": \"NZ\",\n", + " \"label\": \"New Zealand\",\n", + " \"phone\": \"64\"\n", + " },\n", + " {\n", + " \"code\": \"OM\",\n", + " \"label\": \"Oman\",\n", + " \"phone\": \"968\"\n", + " },\n", + " {\n", + " \"code\": \"PA\",\n", + " \"label\": \"Panama\",\n", + " \"phone\": \"507\"\n", + " },\n", + " {\n", + " \"code\": \"PE\",\n", + " \"label\": \"Peru\",\n", + " \"phone\": \"51\"\n", + " },\n", + " {\n", + " \"code\": \"PF\",\n", + " \"label\": \"French Polynesia\",\n", + " \"phone\": \"689\"\n", + " },\n", + " {\n", + " \"code\": \"PG\",\n", + " \"label\": \"Papua New Guinea\",\n", + " \"phone\": \"675\"\n", + " },\n", + " {\n", + " \"code\": \"PH\",\n", + " \"label\": \"Philippines\",\n", + " \"phone\": \"63\"\n", + " },\n", + " {\n", + " \"code\": \"PK\",\n", + " \"label\": \"Pakistan\",\n", + " \"phone\": \"92\"\n", + " },\n", + " {\n", + " \"code\": \"PL\",\n", + " \"label\": \"Poland\",\n", + " \"phone\": \"48\"\n", + " },\n", + " {\n", + " \"code\": \"PM\",\n", + " \"label\": \"Saint Pierre and Miquelon\",\n", + " \"phone\": \"508\"\n", + " },\n", + " {\n", + " \"code\": \"PN\",\n", + " \"label\": \"Pitcairn\",\n", + " \"phone\": \"870\"\n", + " },\n", + " {\n", + " \"code\": \"PR\",\n", + " \"label\": \"Puerto Rico\",\n", + " \"phone\": \"1\"\n", + " },\n", + " {\n", + " \"code\": \"PS\",\n", + " \"label\": \"Palestine, State of\",\n", + " \"phone\": \"970\"\n", + " },\n", + " {\n", + " \"code\": \"PT\",\n", + " \"label\": \"Portugal\",\n", + " \"phone\": \"351\"\n", + " },\n", + " {\n", + " \"code\": \"PW\",\n", + " \"label\": \"Palau\",\n", + " \"phone\": \"680\"\n", + " },\n", + " {\n", + " \"code\": \"PY\",\n", + " \"label\": \"Paraguay\",\n", + " \"phone\": \"595\"\n", + " },\n", + " {\n", + " \"code\": \"QA\",\n", + " \"label\": \"Qatar\",\n", + " \"phone\": \"974\"\n", + " },\n", + " {\n", + " \"code\": \"RE\",\n", + " \"label\": \"Reunion\",\n", + " \"phone\": \"262\"\n", + " },\n", + " {\n", + " \"code\": \"RO\",\n", + " \"label\": \"Romania\",\n", + " \"phone\": \"40\"\n", + " },\n", + " {\n", + " \"code\": \"RS\",\n", + " \"label\": \"Serbia\",\n", + " \"phone\": \"381\"\n", + " },\n", + " {\n", + " \"code\": \"RU\",\n", + " \"label\": \"Russian Federation\",\n", + " \"phone\": \"7\"\n", + " },\n", + " {\n", + " \"code\": \"RW\",\n", + " \"label\": \"Rwanda\",\n", + " \"phone\": \"250\"\n", + " },\n", + " {\n", + " \"code\": \"SA\",\n", + " \"label\": \"Saudi Arabia\",\n", + " \"phone\": \"966\"\n", + " },\n", + " {\n", + " \"code\": \"SB\",\n", + " \"label\": \"Solomon Islands\",\n", + " \"phone\": \"677\"\n", + " },\n", + " {\n", + " \"code\": \"SC\",\n", + " \"label\": \"Seychelles\",\n", + " \"phone\": \"248\"\n", + " },\n", + " {\n", + " \"code\": \"SD\",\n", + " \"label\": \"Sudan\",\n", + " \"phone\": \"249\"\n", + " },\n", + " {\n", + " \"code\": \"SE\",\n", + " \"label\": \"Sweden\",\n", + " \"phone\": \"46\"\n", + " },\n", + " {\n", + " \"code\": \"SG\",\n", + " \"label\": \"Singapore\",\n", + " \"phone\": \"65\"\n", + " },\n", + " {\n", + " \"code\": \"SH\",\n", + " \"label\": \"Saint Helena\",\n", + " \"phone\": \"290\"\n", + " },\n", + " {\n", + " \"code\": \"SI\",\n", + " \"label\": \"Slovenia\",\n", + " \"phone\": \"386\"\n", + " },\n", + " {\n", + " \"code\": \"SJ\",\n", + " \"label\": \"Svalbard and Jan Mayen\",\n", + " \"phone\": \"47\"\n", + " },\n", + " {\n", + " \"code\": \"SK\",\n", + " \"label\": \"Slovakia\",\n", + " \"phone\": \"421\"\n", + " },\n", + " {\n", + " \"code\": \"SL\",\n", + " \"label\": \"Sierra Leone\",\n", + " \"phone\": \"232\"\n", + " },\n", + " {\n", + " \"code\": \"SM\",\n", + " \"label\": \"San Marino\",\n", + " \"phone\": \"378\"\n", + " },\n", + " {\n", + " \"code\": \"SN\",\n", + " \"label\": \"Senegal\",\n", + " \"phone\": \"221\"\n", + " },\n", + " {\n", + " \"code\": \"SO\",\n", + " \"label\": \"Somalia\",\n", + " \"phone\": \"252\"\n", + " },\n", + " {\n", + " \"code\": \"SR\",\n", + " \"label\": \"Suriname\",\n", + " \"phone\": \"597\"\n", + " },\n", + " {\n", + " \"code\": \"SS\",\n", + " \"label\": \"South Sudan\",\n", + " \"phone\": \"211\"\n", + " },\n", + " {\n", + " \"code\": \"ST\",\n", + " \"label\": \"Sao Tome and Principe\",\n", + " \"phone\": \"239\"\n", + " },\n", + " {\n", + " \"code\": \"SV\",\n", + " \"label\": \"El Salvador\",\n", + " \"phone\": \"503\"\n", + " },\n", + " {\n", + " \"code\": \"SX\",\n", + " \"label\": \"Sint Maarten (Dutch part)\",\n", + " \"phone\": \"1-721\"\n", + " },\n", + " {\n", + " \"code\": \"SY\",\n", + " \"label\": \"Syrian Arab Republic\",\n", + " \"phone\": \"963\"\n", + " },\n", + " {\n", + " \"code\": \"SZ\",\n", + " \"label\": \"Swaziland\",\n", + " \"phone\": \"268\"\n", + " },\n", + " {\n", + " \"code\": \"TC\",\n", + " \"label\": \"Turks and Caicos Islands\",\n", + " \"phone\": \"1-649\"\n", + " },\n", + " {\n", + " \"code\": \"TD\",\n", + " \"label\": \"Chad\",\n", + " \"phone\": \"235\"\n", + " },\n", + " {\n", + " \"code\": \"TF\",\n", + " \"label\": \"French Southern Territories\",\n", + " \"phone\": \"262\"\n", + " },\n", + " {\n", + " \"code\": \"TG\",\n", + " \"label\": \"Togo\",\n", + " \"phone\": \"228\"\n", + " },\n", + " {\n", + " \"code\": \"TH\",\n", + " \"label\": \"Thailand\",\n", + " \"phone\": \"66\"\n", + " },\n", + " {\n", + " \"code\": \"TJ\",\n", + " \"label\": \"Tajikistan\",\n", + " \"phone\": \"992\"\n", + " },\n", + " {\n", + " \"code\": \"TK\",\n", + " \"label\": \"Tokelau\",\n", + " \"phone\": \"690\"\n", + " },\n", + " {\n", + " \"code\": \"TL\",\n", + " \"label\": \"Timor-Leste\",\n", + " \"phone\": \"670\"\n", + " },\n", + " {\n", + " \"code\": \"TM\",\n", + " \"label\": \"Turkmenistan\",\n", + " \"phone\": \"993\"\n", + " },\n", + " {\n", + " \"code\": \"TN\",\n", + "\n", + " \"label\": \"Tunisia\",\n", + " \"phone\": \"216\"\n", + " },\n", + " {\n", + " \"code\": \"TO\",\n", + " \"label\": \"Tonga\",\n", + " \"phone\": \"676\"\n", + " },\n", + " {\n", + " \"code\": \"TR\",\n", + " \"label\": \"Turkey\",\n", + " \"phone\": \"90\"\n", + " },\n", + " {\n", + " \"code\": \"TT\",\n", + " \"label\": \"Trinidad and Tobago\",\n", + " \"phone\": \"1-868\"\n", + " },\n", + " {\n", + " \"code\": \"TV\",\n", + " \"label\": \"Tuvalu\",\n", + " \"phone\": \"688\"\n", + " },\n", + " {\n", + " \"code\": \"TW\",\n", + " \"label\": \"Taiwan\",\n", + " \"phone\": \"886\"\n", + " },\n", + " {\n", + " \"code\": \"TZ\",\n", + " \"label\": \"United Republic of Tanzania\",\n", + " \"phone\": \"255\"\n", + " },\n", + " {\n", + " \"code\": \"UA\",\n", + " \"label\": \"Ukraine\",\n", + " \"phone\": \"380\"\n", + " },\n", + " {\n", + " \"code\": \"UG\",\n", + " \"label\": \"Uganda\",\n", + " \"phone\": \"256\"\n", + " },\n", + " {\n", + " \"code\": \"US\",\n", + " \"label\": \"United States\",\n", + " \"phone\": \"1\",\n", + " \"suggested\": true\n", + " },\n", + " {\n", + " \"code\": \"UY\",\n", + " \"label\": \"Uruguay\",\n", + " \"phone\": \"598\"\n", + " },\n", + " {\n", + " \"code\": \"UZ\",\n", + " \"label\": \"Uzbekistan\",\n", + " \"phone\": \"998\"\n", + " },\n", + " {\n", + " \"code\": \"VA\",\n", + " \"label\": \"Holy See (Vatican City State)\",\n", + " \"phone\": \"379\"\n", + " },\n", + " {\n", + " \"code\": \"VC\",\n", + " \"label\": \"Saint Vincent and the Grenadines\",\n", + " \"phone\": \"1-784\"\n", + " },\n", + " {\n", + " \"code\": \"VE\",\n", + " \"label\": \"Venezuela\",\n", + " \"phone\": \"58\"\n", + " },\n", + " {\n", + " \"code\": \"VG\",\n", + " \"label\": \"British Virgin Islands\",\n", + " \"phone\": \"1-284\"\n", + " },\n", + " {\n", + " \"code\": \"VI\",\n", + " \"label\": \"US Virgin Islands\",\n", + " \"phone\": \"1-340\"\n", + " },\n", + " {\n", + " \"code\": \"VN\",\n", + " \"label\": \"Vietnam\",\n", + " \"phone\": \"84\"\n", + " },\n", + " {\n", + " \"code\": \"VU\",\n", + " \"label\": \"Vanuatu\",\n", + " \"phone\": \"678\"\n", + " },\n", + " {\n", + " \"code\": \"WF\",\n", + " \"label\": \"Wallis and Futuna\",\n", + " \"phone\": \"681\"\n", + " },\n", + " {\n", + " \"code\": \"WS\",\n", + " \"label\": \"Samoa\",\n", + " \"phone\": \"685\"\n", + " },\n", + " {\n", + " \"code\": \"XK\",\n", + " \"label\": \"Kosovo\",\n", + " \"phone\": \"383\"\n", + " },\n", + " {\n", + " \"code\": \"YE\",\n", + " \"label\": \"Yemen\",\n", + " \"phone\": \"967\"\n", + " },\n", + " {\n", + " \"code\": \"YT\",\n", + " \"label\": \"Mayotte\",\n", + " \"phone\": \"262\"\n", + " },\n", + " {\n", + " \"code\": \"ZA\",\n", + " \"label\": \"South Africa\",\n", + " \"phone\": \"27\"\n", + " },\n", + " {\n", + " \"code\": \"ZM\",\n", + " \"label\": \"Zambia\",\n", + " \"phone\": \"260\"\n", + " },\n", + " {\n", + " \"code\": \"ZW\",\n", + " \"label\": \"Zimbabwe\",\n", + " \"phone\": \"263\"\n", + " }\n", + "]\n", + "\"\"\")\n", + "\n", + "countries = sorted([c['label'] for c in country_codes])" + ] + }, + { + "cell_type": "markdown", + "id": "5ea2b0d9", + "metadata": {}, + "source": [ + "## Look for wrong countries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1debd59c", + "metadata": {}, + "outputs": [], + "source": [ + "df_users['country'].isin(countries).value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77809556", + "metadata": {}, + "outputs": [], + "source": [ + "if df_users[~df_users['country'].isin(countries)].empty:\n", + " print(\"yes\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "850a6996", + "metadata": {}, + "outputs": [], + "source": [ + "if not df_users[~df_users['country'].isin(countries)].empty:\n", + " from fuzzywuzzy import process\n", + " need_remaps = list(df_users[~df_users['country'].isin(countries)]['country'].unique())\n", + " remapping = {}\n", + " for need_remap in need_remaps:\n", + " found, score = process.extract(need_remap, countries, limit=1)[0]\n", + " remapping[need_remap] = {'remapped': found, 'score': score}\n", + " pd.DataFrame(remapping).T.to_csv('Remap_countries.csv')\n", + " pd.Series(countries).to_csv('countries.csv')\n", + " # Reload\n", + " # s_remaped = pd.read_clipboard().set_index('Original')['remapped']\n", + " # s_remaped.to_json('scripts/remap_countries.json', indent=2)" + ] + }, + { + "cell_type": "markdown", + "id": "16ec822d", + "metadata": {}, + "source": [ + "## Map to alpha2 / continent" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "de0bc941", + "metadata": {}, + "outputs": [], + "source": [ + "#df['country_alpha2']\n", + "def get_alpha2(country_name):\n", + " return next(c for c in country_codes if c['label'] == country_name)['code']\n", + "\n", + "df_users['country_alpha2'] = df_users['country'].map(get_alpha2)\n", + "\n", + "CONTINENT_CODE_TO_NAME = {\n", + " 'NA': 'North America',\n", + " 'SA': 'South America', \n", + " 'AS': 'Asia',\n", + " 'OC': 'Australia',\n", + " 'AF': 'Africa',\n", + " 'EU': 'Europe'\n", + "}\n", + "\n", + "def alpha2_to_continent_name(alpha2):\n", + " return CONTINENT_CODE_TO_NAME[pycountry_convert.country_alpha2_to_continent_code(alpha2)]\n", + " \n", + "df_users['continent'] = df_users['country_alpha2'].map(alpha2_to_continent_name)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d0f47ede", + "metadata": {}, + "outputs": [], + "source": [ + "df_users.groupby('continent').size()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8110d004", + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "df_users.groupby('country').size().sort_values(ascending=False)" + ] + }, + { + "cell_type": "markdown", + "id": "f31c7e2d", + "metadata": {}, + "source": [ + "# Plot Users" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c32952eb", + "metadata": {}, + "outputs": [], + "source": [ + "import geopandas as gpd" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "45658ff5", + "metadata": {}, + "outputs": [], + "source": [ + "# https://www.naturalearthdata.com/downloads/110m-cultural-vectors/\n", + "# Admin 0 - countries\n", + "# naturalearth_lowres\n", + "# world = gpd.read_file('/home/julien/Downloads/ne_110m_admin_0_countries.zip')\n", + "world = gpd.read_file('https://raw.githubusercontent.com/nvkelso/natural-earth-vector/refs/heads/master/geojson/ne_110m_admin_0_countries.geojson')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4f8a8966", + "metadata": {}, + "outputs": [], + "source": [ + "s_count_per_country = df_users.groupby('country_alpha2').size().sort_values(ascending=False)\n", + "s_count_per_country.name = 'Nb Users'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4e8b04f3", + "metadata": {}, + "outputs": [], + "source": [ + "location = pd.read_csv(\n", + " 'https://raw.githubusercontent.com/google/dspl/master/samples/google/canonical/countries.csv'\n", + ").set_index('country')[['latitude', 'longitude']]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ae82ec49", + "metadata": {}, + "outputs": [], + "source": [ + "world = world.merge(\n", + " right=location,\n", + " how='left',\n", + " left_on='ISO_A2_EH', # ISO_A2 has some -99 in there...\n", + " right_index=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5f05de40", + "metadata": {}, + "outputs": [], + "source": [ + "toplot = world.merge(\n", + " right=s_count_per_country,\n", + " how='left',\n", + " left_on='ISO_A2_EH',\n", + " right_index=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0aae2d69", + "metadata": {}, + "outputs": [], + "source": [ + "per_continent = toplot.groupby('CONTINENT')['Nb Users'].sum()\n", + "toplot['Nb Users Per Continent'] = toplot['CONTINENT'].map(lambda c: per_continent.loc[c])" + ] + }, + { + "cell_type": "markdown", + "id": "36818d52", + "metadata": {}, + "source": [ + "## Per Country" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "435a05a3", + "metadata": {}, + "outputs": [], + "source": [ + "fig, ax = plt.subplots(figsize=(16, 9))\n", + "\n", + "world.plot(\n", + " ax=ax,\n", + " color=\"lightgray\",\n", + " edgecolor=\"black\",\n", + " alpha=0.5\n", + ")\n", + "\n", + "\n", + "toplot.plot(ax=ax, column='Nb Users', #scheme=\"quantiles\",\n", + " figsize=(16, 9),\n", + " legend=True, cmap='viridis')\n", + "toplot.apply(lambda x:\n", + " ax.annotate(\n", + " text=f\"{x['Nb Users']:.0f}\",\n", + " #xy=x.loc['geometry'].centroid.coords[0],\n", + " xy=(x.loc['longitude'], x.loc['latitude']),\n", + " c='red', ha='center', va='center',\n", + " ) \n", + " if not pd.isna(x['Nb Users']) else None,\n", + " axis=1)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "13645b8f", + "metadata": {}, + "source": [ + "### As bar" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "49c535d4", + "metadata": {}, + "outputs": [], + "source": [ + "fig, ax = plt.subplots(figsize=(16, 36))\n", + "\n", + "(\n", + " toplot[['FORMAL_EN', 'Nb Users']]\n", + " .dropna()\n", + " .sort_values(by='Nb Users', ascending=True)\n", + " .set_index('FORMAL_EN')\n", + " .plot(kind='barh', ax=ax)\n", + ")\n", + "for p in ax.patches:\n", + " ax.annotate(f\"{p.get_width():.0f}\", (p.get_width() * 1.005, p.get_y() + p.get_height() / 2.0))\n", + " \n", + "ax.set_title('Users per country')" + ] + }, + { + "cell_type": "markdown", + "id": "34386326", + "metadata": {}, + "source": [ + "## Per continent" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f97c991e", + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "ax = toplot.plot(column='Nb Users Per Continent', #scheme=\"quantiles\",\n", + " figsize=(16, 9),\n", + " legend=True,cmap='Reds')" + ] + }, + { + "cell_type": "markdown", + "id": "77db1ce0", + "metadata": {}, + "source": [ + "### As a bar" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dfbd1942", + "metadata": {}, + "outputs": [], + "source": [ + "fig, ax = plt.subplots(figsize=(16,9 ))\n", + "\n", + "(\n", + " df_users.groupby('continent')['uid'].count()\n", + " .sort_values(ascending=True)\n", + " .plot(kind='barh', ax=ax)\n", + ")\n", + "for p in ax.patches:\n", + " ax.annotate(f\"{p.get_width():.0f}\", (p.get_width() * 1.005, p.get_y() + p.get_height() / 2.0))\n", + "ax.set_title('Users per continent')" + ] + }, + { + "cell_type": "markdown", + "id": "d5e91578", + "metadata": {}, + "source": [ + "# Other" + ] + }, + { + "cell_type": "markdown", + "id": "e84881ca", + "metadata": {}, + "source": [ + "## Grab via query" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "00f0307b", + "metadata": {}, + "outputs": [], + "source": [ + "from google.cloud.firestore_v1.base_query import FieldFilter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13d6cc6d", + "metadata": {}, + "outputs": [], + "source": [ + "usersRef.where(field_path='firstName', op_string='==', value='Julien')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9d369b86", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: Use of CollectionRef stream() is prefered to get()\n", + "docs = (\n", + " usersRef\n", + " # .where(field_path='firstName', op_string='==', value='Julien')\n", + " .where(filter=FieldFilter(field_path='firstName', op_string='==', value='Julien'))\n", + " .stream()\n", + ")\n", + "\n", + "for doc in docs:\n", + " print(f\"{doc.id} => {doc.to_dict()}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5e860e76", + "metadata": {}, + "outputs": [], + "source": [ + "from google.cloud.firestore_v1.field_path import FieldPath\n", + "FieldPath.document_id()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0a5ba782", + "metadata": {}, + "outputs": [], + "source": [ + "from google.cloud.firestore_v1.types import StructuredQuery\n", + "[print(x.name) for x in StructuredQuery.FieldFilter.Operator]" + ] + }, + { + "cell_type": "markdown", + "id": "6481029d", + "metadata": {}, + "source": [ + "## Ensure Auth Users matches the Users in Firestore database" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "024f9a57", + "metadata": {}, + "outputs": [], + "source": [ + "from firebase_admin import auth" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0e563075", + "metadata": {}, + "outputs": [], + "source": [ + "auth_users = list(auth.list_users(app=app).iterate_all())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "daf9d144", + "metadata": {}, + "outputs": [], + "source": [ + "extra_auth_users = [auth_user for auth_user in auth_users if auth_user.uid not in user_uids]\n", + "print(f\"There are {len(extra_auth_users)} Extra Auth users compared to Firestore database\")\n", + "if extra_auth_users:\n", + " for extra_auth_user in extra_auth_users:\n", + " print(extra_auth_user.uid, extra_auth_user.email)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aca36420", + "metadata": {}, + "outputs": [], + "source": [ + "assert len(auth_users) == len(users)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}