Skip to content

Commit

Permalink
reorganize notebooks for better sharing
Browse files Browse the repository at this point in the history
  • Loading branch information
matteo-stat committed Aug 28, 2024
1 parent f99080f commit 86b60df
Show file tree
Hide file tree
Showing 7 changed files with 948 additions and 835 deletions.
436 changes: 436 additions & 0 deletions 01-ssd-framework-single-shot-detector-for-object-detection.ipynb

Large diffs are not rendered by default.

455 changes: 455 additions & 0 deletions 02-data-encoding-and-decoding.ipynb

Large diffs are not rendered by default.

File renamed without changes.
137 changes: 57 additions & 80 deletions explore-dataset.ipynb → 99-check-dataset-class-imbalance.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,23 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# global variables"
"## Dependecies and Parameters\n",
"\n",
"Let's quickly import dependecies and define some useful parameters for this notebook."
]
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# dependecies\n",
"import csv\n",
"import json\n",
"import numpy as np\n",
"import random\n",
"\n",
"# data options\n",
"LABELS_CODES = [0, 1, 2, 3]\n",
"LABEL_CODE_BACKGROUND = 0\n",
Expand All @@ -38,31 +46,14 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# dependecies"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import csv\n",
"import json\n",
"import numpy as np\n",
"import random"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# load metadata"
"## Read Metadata\n",
"\n",
"Read metadata (files paths locations for images and boxes coordinates)."
]
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -101,67 +92,22 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# object detection"
"## Check Class Imbalance"
]
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"# which data should be evaluated?\n",
"PATH_FILES_LABELS_BOXES = path_files_labels_boxes_train"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## samples, images and boxes aspect ratios for each class"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"# for each class initialize counters for samples (images), instances (objects) and boxes aspect ratios (width / height)\n",
"# storing samples indexes per class and then counting the number of unique indexes it's a simple way to count samples per class\n",
"samples_per_class = {label: [] for label in LABELS_CODES if label != LABEL_CODE_BACKGROUND}\n",
"instances_per_class = {label: 0 for label in LABELS_CODES if label != LABEL_CODE_BACKGROUND}\n",
"boxes_aspect_ratios_per_class = {label: [] for label in LABELS_CODES if label != LABEL_CODE_BACKGROUND}\n",
"\n",
"# for each file count number of samples per class and images per class\n",
"for i, path_file_labels_boxes in enumerate(PATH_FILES_LABELS_BOXES):\n",
"\n",
" # read ground truth labels and boxes\n",
" with open(path_file_labels_boxes, 'r') as f:\n",
" for label, xmin, ymin, xmax, ymax in csv.reader(f):\n",
"\n",
" # format ground truth data\n",
" label = int(label)\n",
" width = float(xmax) - float(xmin) + 1.0\n",
" height = float(ymax) - float(ymin) + 1.0 \n",
"\n",
" # add indexes for count samples later on\n",
" samples_per_class[label].append(i)\n",
"\n",
" # increment instances counter\n",
" instances_per_class[label] += 1\n",
"\n",
" # add aspect ratio to the list\n",
" boxes_aspect_ratios_per_class[label].append(width / height)\n",
"\n",
"\n",
"# calculate the number of samples per class\n",
"samples_per_class = {label: len(set(indexes)) for label, indexes in samples_per_class.items()}"
]
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 10,
"metadata": {},
"outputs": [
{
Expand All @@ -180,7 +126,7 @@
"*** instances ***\n",
"************************\n",
"> monorail: 1,861 - 34%\n",
"> person: 2,097 - 38%\n",
"> person: 2,071 - 38%\n",
"> forklift: 1,535 - 28%\n",
"\n",
"************************\n",
Expand All @@ -198,14 +144,14 @@
" - p80: 3.376\n",
" - p90: 5.129\n",
"> person\n",
" - p10: 0.315\n",
" - p20: 0.385\n",
" - p30: 0.467\n",
" - p40: 0.557\n",
" - p10: 0.318\n",
" - p20: 0.388\n",
" - p30: 0.471\n",
" - p40: 0.559\n",
" - p50: 0.662\n",
" - p60: 0.781\n",
" - p70: 0.955\n",
" - p80: 1.277\n",
" - p60: 0.788\n",
" - p70: 0.972\n",
" - p80: 1.325\n",
" - p90: 2.571\n",
"> forklift\n",
" - p10: 0.461\n",
Expand All @@ -221,6 +167,37 @@
}
],
"source": [
"# for each class initialize counters for samples (images), instances (objects) and boxes aspect ratios (width / height)\n",
"# storing samples indexes per class and then counting the number of unique indexes it's a simple way to count samples per class\n",
"samples_per_class = {label: [] for label in LABELS_CODES if label != LABEL_CODE_BACKGROUND}\n",
"instances_per_class = {label: 0 for label in LABELS_CODES if label != LABEL_CODE_BACKGROUND}\n",
"boxes_aspect_ratios_per_class = {label: [] for label in LABELS_CODES if label != LABEL_CODE_BACKGROUND}\n",
"\n",
"# for each file count number of samples per class and images per class\n",
"for i, path_file_labels_boxes in enumerate(PATH_FILES_LABELS_BOXES):\n",
"\n",
" # read ground truth labels and boxes\n",
" with open(path_file_labels_boxes, 'r') as f:\n",
" for label, xmin, ymin, xmax, ymax in csv.reader(f):\n",
"\n",
" # format ground truth data\n",
" label = int(label)\n",
" width = float(xmax) - float(xmin) + 1.0\n",
" height = float(ymax) - float(ymin) + 1.0 \n",
"\n",
" # add indexes for count samples later on\n",
" samples_per_class[label].append(i)\n",
"\n",
" # increment instances counter\n",
" instances_per_class[label] += 1\n",
"\n",
" # add aspect ratio to the list\n",
" boxes_aspect_ratios_per_class[label].append(width / height)\n",
"\n",
"\n",
"# calculate the number of samples per class\n",
"samples_per_class = {label: len(set(indexes)) for label, indexes in samples_per_class.items()}\n",
"\n",
"# print samples\n",
"total_samples = sum(samples_per_class.values())\n",
"print('\\n************************')\n",
Expand Down Expand Up @@ -274,7 +251,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
"version": "3.11.9"
},
"orig_nbformat": 4
},
Expand Down
File renamed without changes.
Loading

0 comments on commit 86b60df

Please sign in to comment.