reorganize notebooks for better sharing

matteo-stat · Aug 28, 2024 · 86b60df · 86b60df
1 parent f99080f
commit 86b60df
Show file tree

Hide file tree

Showing 7 changed files with 948 additions and 835 deletions.
diff --git a/01-ssd-framework-single-shot-detector-for-object-detection.ipynb b/01-ssd-framework-single-shot-detector-for-object-detection.ipynb
diff --git a/02-data-encoding-and-decoding.ipynb b/02-data-encoding-and-decoding.ipynb
diff --git a/ssd-segmentation-training.ipynb → 03-ssd-segmentation-training.ipynb b/ssd-segmentation-training.ipynb → 03-ssd-segmentation-training.ipynb
diff --git a/explore-dataset.ipynb → 99-check-dataset-class-imbalance.ipynb b/explore-dataset.ipynb → 99-check-dataset-class-imbalance.ipynb
@@ -4,15 +4,23 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# global variables"
+    "## Dependecies and Parameters\n",
+    "\n",
+    "Let's quickly import dependecies and define some useful parameters for this notebook."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
+    "# dependecies\n",
+    "import csv\n",
+    "import json\n",
+    "import numpy as np\n",
+    "import random\n",
+    "\n",
     "# data options\n",
     "LABELS_CODES = [0, 1, 2, 3]\n",
     "LABEL_CODE_BACKGROUND = 0\n",
@@ -38,31 +46,14 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# dependecies"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import csv\n",
-    "import json\n",
-    "import numpy as np\n",
-    "import random"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# load metadata"
+    "## Read Metadata\n",
+    "\n",
+    "Read metadata (files paths locations for images and boxes coordinates)."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -101,67 +92,22 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# object detection"
+    "## Check Class Imbalance"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
     "# which data should be evaluated?\n",
     "PATH_FILES_LABELS_BOXES = path_files_labels_boxes_train"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## samples, images and boxes aspect ratios for each class"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# for each class initialize counters for samples (images), instances (objects) and boxes aspect ratios (width / height)\n",
-    "# storing samples indexes per class and then counting the number of unique indexes it's a simple way to count samples per class\n",
-    "samples_per_class = {label: [] for label in LABELS_CODES if label != LABEL_CODE_BACKGROUND}\n",
-    "instances_per_class = {label: 0 for label in LABELS_CODES if label != LABEL_CODE_BACKGROUND}\n",
-    "boxes_aspect_ratios_per_class = {label: [] for label in LABELS_CODES if label != LABEL_CODE_BACKGROUND}\n",
-    "\n",
-    "# for each file count number of samples per class and images per class\n",
-    "for i, path_file_labels_boxes in enumerate(PATH_FILES_LABELS_BOXES):\n",
-    "\n",
-    "    # read ground truth labels and boxes\n",
-    "    with open(path_file_labels_boxes, 'r') as f:\n",
-    "        for label, xmin, ymin, xmax, ymax in csv.reader(f):\n",
-    "\n",
-    "            # format ground truth data\n",
-    "            label = int(label)\n",
-    "            width = float(xmax) - float(xmin) + 1.0\n",
-    "            height = float(ymax) - float(ymin) + 1.0            \n",
-    "\n",
-    "            # add indexes for count samples later on\n",
-    "            samples_per_class[label].append(i)\n",
-    "\n",
-    "            # increment instances counter\n",
-    "            instances_per_class[label] += 1\n",
-    "\n",
-    "            # add aspect ratio to the list\n",
-    "            boxes_aspect_ratios_per_class[label].append(width / height)\n",
-    "\n",
-    "\n",
-    "# calculate the number of samples per class\n",
-    "samples_per_class = {label: len(set(indexes)) for label, indexes in samples_per_class.items()}"
-   ]
-  },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
@@ -180,7 +126,7 @@
       "***    instances     ***\n",
       "************************\n",
       "> monorail: 1,861 - 34%\n",
-      ">   person: 2,097 - 38%\n",
+      ">   person: 2,071 - 38%\n",
       "> forklift: 1,535 - 28%\n",
       "\n",
       "************************\n",
@@ -198,14 +144,14 @@
       "   - p80: 3.376\n",
       "   - p90: 5.129\n",
       "> person\n",
-      "   - p10: 0.315\n",
-      "   - p20: 0.385\n",
-      "   - p30: 0.467\n",
-      "   - p40: 0.557\n",
+      "   - p10: 0.318\n",
+      "   - p20: 0.388\n",
+      "   - p30: 0.471\n",
+      "   - p40: 0.559\n",
       "   - p50: 0.662\n",
-      "   - p60: 0.781\n",
-      "   - p70: 0.955\n",
-      "   - p80: 1.277\n",
+      "   - p60: 0.788\n",
+      "   - p70: 0.972\n",
+      "   - p80: 1.325\n",
       "   - p90: 2.571\n",
       "> forklift\n",
       "   - p10: 0.461\n",
@@ -221,6 +167,37 @@
     }
    ],
    "source": [
+    "# for each class initialize counters for samples (images), instances (objects) and boxes aspect ratios (width / height)\n",
+    "# storing samples indexes per class and then counting the number of unique indexes it's a simple way to count samples per class\n",
+    "samples_per_class = {label: [] for label in LABELS_CODES if label != LABEL_CODE_BACKGROUND}\n",
+    "instances_per_class = {label: 0 for label in LABELS_CODES if label != LABEL_CODE_BACKGROUND}\n",
+    "boxes_aspect_ratios_per_class = {label: [] for label in LABELS_CODES if label != LABEL_CODE_BACKGROUND}\n",
+    "\n",
+    "# for each file count number of samples per class and images per class\n",
+    "for i, path_file_labels_boxes in enumerate(PATH_FILES_LABELS_BOXES):\n",
+    "\n",
+    "    # read ground truth labels and boxes\n",
+    "    with open(path_file_labels_boxes, 'r') as f:\n",
+    "        for label, xmin, ymin, xmax, ymax in csv.reader(f):\n",
+    "\n",
+    "            # format ground truth data\n",
+    "            label = int(label)\n",
+    "            width = float(xmax) - float(xmin) + 1.0\n",
+    "            height = float(ymax) - float(ymin) + 1.0            \n",
+    "\n",
+    "            # add indexes for count samples later on\n",
+    "            samples_per_class[label].append(i)\n",
+    "\n",
+    "            # increment instances counter\n",
+    "            instances_per_class[label] += 1\n",
+    "\n",
+    "            # add aspect ratio to the list\n",
+    "            boxes_aspect_ratios_per_class[label].append(width / height)\n",
+    "\n",
+    "\n",
+    "# calculate the number of samples per class\n",
+    "samples_per_class = {label: len(set(indexes)) for label, indexes in samples_per_class.items()}\n",
+    "\n",
     "# print samples\n",
     "total_samples = sum(samples_per_class.values())\n",
     "print('\\n************************')\n",
@@ -274,7 +251,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.12"
+   "version": "3.11.9"
   },
   "orig_nbformat": 4
  },

diff --git a/reorganize-original-data.py → 99-reorganize-original-data.py b/reorganize-original-data.py → 99-reorganize-original-data.py