diff --git a/README.imports b/README.imports new file mode 100644 index 00000000..bb2ff9c4 --- /dev/null +++ b/README.imports @@ -0,0 +1,7 @@ +Importing from other sources of COCO annotations can be very cumbersome, often relative pathnames are encouraged (in the coco-annotator too apparently), however importing or exporting removes that relationship. +A new attribute has been added to the ImageCatalouge to capture the relative path of an image at import time as well as scanning. if there are no sub-directories this variable will be equal to the file_name, however the search logic will use the relative pathname to discover the image. +When exporting the relative path will replace the 'file_name' entry in the 'image' dictionary, this behaviour can be switched off by setting the 'EXPORT_RELPATH' environment variable to false (in the compose file for the workers). +If the annotation only contains a bbox and no valid segmentation the segmentation is derived form the bbox. Previously this annotation would have been ignored. + + Uwe Rosebrock July 2021 + \ No newline at end of file diff --git a/backend/config/config.py b/backend/config/config.py index 8d27a2cc..6abcfb31 100644 --- a/backend/config/config.py +++ b/backend/config/config.py @@ -50,6 +50,8 @@ class Config: WORKER_CONNECTIONS = 1000 TESTING = os.getenv("TESTING", False) + + EXPORT_RELPATH = os.getenv("EXPORT_RELPATH", True) ### Workers CELERY_BROKER_URL = os.getenv("CELERY_BROKER_URL", "amqp://user:password@messageq:5672//") diff --git a/backend/database/images.py b/backend/database/images.py index b5b0fade..c458d071 100644 --- a/backend/database/images.py +++ b/backend/database/images.py @@ -1,14 +1,14 @@ import os import imantics as im - +from pathlib import PurePath from PIL import Image, ImageFile from mongoengine import * from .events import Event, SessionEvent from .datasets import DatasetModel from .annotations import AnnotationModel - +from config import Config ImageFile.LOAD_TRUNCATED_IMAGES = True @@ -16,7 +16,7 @@ class ImageModel(DynamicDocument): COCO_PROPERTIES = ["id", "width", "height", "file_name", "path", "license",\ - "flickr_url", "coco_url", "date_captured", "dataset_id"] + "flickr_url", "coco_url", "date_captured", "dataset_id","relpath","num_annotations"] # -- Contants THUMBNAIL_DIRECTORY = '.thumbnail' @@ -35,6 +35,7 @@ class ImageModel(DynamicDocument): # Absolute path to image file path = StringField(required=True, unique=True) + relpath = StringField(required=True, unique=True) width = IntField(required=True) height = IntField(required=True) file_name = StringField() @@ -74,16 +75,21 @@ def create_from_path(cls, path, dataset_id=None): if dataset_id is not None: image.dataset_id = dataset_id + dataset = DatasetModel.objects.get(id=dataset_id) + directory = os.path.join(Config.DATASET_DIRECTORY, dataset.name) else: # Get dataset name from path folders = path.split('/') + i = folders.index("datasets") dataset_name = folders[i+1] - + directory = os.path.join(*folders[:i+2]) dataset = DatasetModel.objects(name=dataset_name).first() if dataset is not None: image.dataset_id = dataset.id - + + # UR added relpath + image.relpath = str(PurePath(image.path).relative_to(directory)) pil_image.close() return image diff --git a/backend/database/tasks.py b/backend/database/tasks.py index 7a9e622a..8286af71 100644 --- a/backend/database/tasks.py +++ b/backend/database/tasks.py @@ -40,6 +40,10 @@ class TaskModel(DynamicDocument): _update_every = 10 _progress_update = 0 + + def debug(self, string): + self._log(string, level="DEBUG") + def error(self, string): self._log(string, level="ERROR") diff --git a/backend/workers/tasks/data.py b/backend/workers/tasks/data.py index 0df11efe..f70fe0a0 100644 --- a/backend/workers/tasks/data.py +++ b/backend/workers/tasks/data.py @@ -18,8 +18,14 @@ from celery import shared_task from ..socket import create_socket from mongoengine import Q +from config import Config +from pathlib import PurePath +def bbox2seg(bbox): + return [bbox[0],bbox[1],bbox[0]+bbox[2],bbox[1],bbox[0]+bbox[2],bbox[1]+bbox[3],bbox[0],bbox[1]+bbox[3]] + + @shared_task def export_annotations(task_id, dataset_id, categories): @@ -74,7 +80,10 @@ def export_annotations(task_id, dataset_id, categories): total_images = db_images.count() for image in db_images: image = fix_ids(image) - + + if Config.EXPORT_RELPATH and 'relpath' in image: + image['file_name'] = image['relpath'] + progress += 1 task.set_progress((progress / total_items) * 100, socket=socket) @@ -103,7 +112,11 @@ def export_annotations(task_id, dataset_id, categories): num_annotations += 1 coco.get('annotations').append(annotation) - + ''' + if num_annotations > 0: + image["num_annotations"]=num_annotations + image["annotated"]=True + ''' task.info( f"Exporting {num_annotations} annotations for image {image.get('id')}") coco.get('images').append(image) @@ -135,7 +148,8 @@ def import_annotations(task_id, dataset_id, coco_json): task = TaskModel.objects.get(id=task_id) dataset = DatasetModel.objects.get(id=dataset_id) - + # UR added relpath + directory = os.path.join(Config.DATASET_DIRECTORY, dataset.name) task.update(status="PROGRESS") socket = create_socket() @@ -203,12 +217,12 @@ def import_annotations(task_id, dataset_id, coco_json): for image in coco_images: image_id = image.get('id') image_filename = image.get('file_name') - + # update progress progress += 1 task.set_progress((progress / total_items) * 100, socket=socket) - - image_model = images.filter(file_name__exact=image_filename).all() + # UR added relpath + image_model = images.filter(relpath=image_filename).all() if len(image_model) == 0: task.warning(f"Could not find image {image_filename}") @@ -239,11 +253,11 @@ def import_annotations(task_id, dataset_id, coco_json): progress += 1 task.set_progress((progress / total_items) * 100, socket=socket) - has_segmentation = len(segmentation) > 0 + has_segmentation = (len(segmentation) > 0 or isbbox) and sum(bbox) > 1 has_keypoints = len(keypoints) > 0 if not has_segmentation and not has_keypoints: task.warning( - f"Annotation {annotation.get('id')} has no segmentation or keypoints") + f"Annotation {annotation.get('id')} has no segmentation, bbox or keypoints") continue try: @@ -259,7 +273,8 @@ def import_annotations(task_id, dataset_id, coco_json): image_id=image_model.id, category_id=category_model_id, segmentation=segmentation, - keypoints=keypoints + keypoints=keypoints, + bbox = bbox ).first() if annotation_model is None: @@ -272,9 +287,14 @@ def import_annotations(task_id, dataset_id, coco_json): annotation_model.metadata = annotation.get('metadata', {}) if has_segmentation: + if len(segmentation) < 1 or len(segmentation[0]) < 1: ## we have an empty segment with a bbox + task.info(f"Creating segment from bbox {bbox}") + segmentation = [bbox2seg(bbox)] + isbbox = True + annotation_model.segmentation = segmentation - annotation_model.area = area - annotation_model.bbox = bbox + annotation_model.area = area + annotation_model.bbox = bbox if has_keypoints: annotation_model.keypoints = keypoints @@ -283,6 +303,7 @@ def import_annotations(task_id, dataset_id, coco_json): annotation_model.save() image_categories.append(category_id) + else: annotation_model.update(deleted=False, isbbox=isbbox) task.info( diff --git a/backend/workers/tasks/scan.py b/backend/workers/tasks/scan.py index df6efd7c..df5cb14e 100644 --- a/backend/workers/tasks/scan.py +++ b/backend/workers/tasks/scan.py @@ -7,7 +7,7 @@ from celery import shared_task from ..socket import create_socket from .thumbnails import thumbnail_generate_single_image - +from pathlib import PurePath import os @@ -22,36 +22,42 @@ def scan_dataset(task_id, dataset_id): directory = dataset.directory toplevel = list(os.listdir(directory)) - task.info(f"Scanning {directory}") + task.info(f"Scanning {directory} ") count = 0 for root, dirs, files in os.walk(directory): - + task.info(f"Scanning {directory} at {root}") try: - youarehere = toplevel.index(root.split('/')[-1]) - progress = int(((youarehere)/len(toplevel))*100) + if root in toplevel: + youarehere = toplevel.index(root.split('/')[-1]) + progress = int(((youarehere)/len(toplevel))*100) + else: + progress = len(toplevel)/100 + youarehere = root task.set_progress(progress, socket=socket) - except: - pass + except Exception as ee: + task.warning(f"Could not set progress {youarehere} because of {ee}") if root.split('/')[-1].startswith('.'): + task.debug(f"Ignoring hidden root: {root}") continue for file in files: path = os.path.join(root, file) - + relpath = str(PurePath(path).relative_to(directory)) if path.endswith(ImageModel.PATTERN): - db_image = ImageModel.objects(path=path).first() + db_image = ImageModel.objects(relpath=relpath).first() if db_image is not None: + task.debug(f"File already exists: {relpath}") continue try: ImageModel.create_from_path(path, dataset.id).save() count += 1 task.info(f"New file found: {path}") - except: - task.warning(f"Could not read {path}") + except Exception as e: + task.warning(f"Could not read {path} because of {e}") [thumbnail_generate_single_image.delay(image.id) for image in ImageModel.objects(regenerate_thumbnail=True).all()] diff --git a/build_local.sh b/build_local.sh new file mode 100755 index 00000000..82e2a9d2 --- /dev/null +++ b/build_local.sh @@ -0,0 +1,16 @@ +#!/bin/bash +if [[ "$1" == regi* ]];then + +docker build -t docker-registry.it.csiro.au/trike/uwer/coco-annotator:python-env -f ./backend/Dockerfile . +docker build -t docker-registry.it.csiro.au/trike/uwer/annotator_webclient -f Dockerfile . +docker build -t docker-registry.it.csiro.au/trike/uwer/annotator_workers -f ./backend/workers/Dockerfile . + +docker push docker-registry.it.csiro.au/trike/uwer/coco-annotator:python-env +docker push docker-registry.it.csiro.au/trike/uwer/annotator_webclient +docker push docker-registry.it.csiro.au/trike/uwer/annotator_workers +else +docker build -f ./backend/Dockerfile . -t uwer/coco-annotator:python-env +docker build . -t uwer/annotator_webserver +docker build -f ./backend/workers/Dockerfile . -t uwer/annotator_workers + +fi \ No newline at end of file diff --git a/docker-compose-local.yml b/docker-compose-local.yml new file mode 100644 index 00000000..0405bbb5 --- /dev/null +++ b/docker-compose-local.yml @@ -0,0 +1,61 @@ +version: "3" +volumes: + mongodb_data_uwer: + external: false + +networks: + cocoannotator_uwer: + external: false + +services: + webserver: + #image: uwer/coco-annotator + image: uwer/annotator_webserver + #:webserver-stable + container_name: annotator_webserver_uwer + restart: always + ports: + - "5050:5000" + environment: + - SECRET_KEY=RandomSecretKeyHere + - FILE_WATCHER=true + volumes: + - "./datasets:/datasets" + - "./models:/models" + depends_on: + - database + - workers + networks: + - cocoannotator_uwer + workers: + container_name: annotator_workers_uwer + image: uwer/annotator_workers + #:workers-stable + volumes: + - "./datasets:/datasets" + depends_on: + - messageq + - database + networks: + - cocoannotator_uwer + messageq: + image: rabbitmq:3 + container_name: annotator_message_q_uwer + environment: + - RABBITMQ_DEFAULT_USER=user + - RABBITMQ_DEFAULT_PASS=password + - EXPORT_RELPATH=true + networks: + - cocoannotator_uwer + database: + image: mongo:4.0 + container_name: annotator_mongodb_uwer + restart: always + environment: + - MONGO_DATA_DIR=/data/db + - MONGO_LOG_DIR=/dev/null + volumes: + - "mongodb_data_uwer:/data/db" + command: "mongod --smallfiles --logpath=/dev/null" + networks: + - cocoannotator_uwer