-
Notifications
You must be signed in to change notification settings - Fork 84
/
Copy pathipu_automl_io.py
165 lines (137 loc) · 6.16 KB
/
ipu_automl_io.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# Copyright (c) 2022 Graphcore Ltd. All rights reserved.
# Copyright 2020 Google Research. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# The following functions are modified from original AutoML source.
# Changes are identified within the function docstring.
import os
from typing import Tuple
import numpy as np
import tensorflow as tf
from PIL import Image
from tensorflow.keras import layers
import dataloader
import utils
from tf2 import label_util, postprocess
from visualize import vis_utils
def visualize_image(
image,
boxes,
classes,
scores,
label_map=None,
min_score_thresh=0.01,
max_boxes_to_draw=1000,
line_thickness=2,
**kwargs,
):
"""Visualizes a given image.
Original implementation in tf2/inference.py
Args:
image: a image with shape [H, W, C].
boxes: a box prediction with shape [N, 4] ordered [ymin, xmin, ymax, xmax].
classes: a class prediction with shape [N].
scores: A list of float value with shape [N].
label_map: a dictionary from class id to name.
min_score_thresh: minimal score for showing. If claass probability is below
this threshold, then the object will not show up.
max_boxes_to_draw: maximum bounding box to draw.
line_thickness: how thick is the bounding box line.
**kwargs: extra parameters.
Returns:
output_image: an output image with annotated boxes and classes.
"""
label_map = label_util.get_label_map(label_map or "coco")
category_index = {k: {"id": k, "name": label_map[k]} for k in label_map}
img = np.array(image)
vis_utils.visualize_boxes_and_labels_on_image_array(
img,
boxes,
classes,
scores,
category_index,
min_score_thresh=min_score_thresh,
max_boxes_to_draw=max_boxes_to_draw,
line_thickness=line_thickness,
**kwargs,
)
return img
def preprocess_resize(imgs: tf.Tensor, image_size: Tuple):
"""Change from original model: Split preprocessing - resize off device, store as uint8,
normalise on-device, cast to float. This function only handles the resize/crop.
Modified from the original implementation: keras/efficientdet_keras.py: EfficientDetModel._preprocessing."""
image_size = utils.parse_image_size(image_size)
def map_fn(image):
input_processor = dataloader.DetectionInputProcessor(image, image_size)
input_processor.set_scale_factors_to_output_size()
image = input_processor.resize_and_crop_image()
image_scale = input_processor.image_scale_to_original
image = tf.cast(image, tf.uint8)
return image, image_scale
if imgs.shape.as_list()[0]: # fixed batch size.
micro_batch_size = imgs.shape.as_list()[0]
outputs = [map_fn(imgs[i]) for i in range(micro_batch_size)]
return [tf.stack(y) for y in zip(*outputs)]
# otherwise treat it as dynamic batch size.
return tf.vectorized_map(map_fn, imgs)
def preprocess_normalize_image(img: tf.Tensor, img_dtype: tf.DType):
"""Normalize the image to zero mean and unit variance.
Original implementation in dataloader.py: InputProcessor"""
# The image normalization is identical to Cloud TPU ResNet.
# Equivalent of tf.image.convert_image_dtype from uint8 to float
img = tf.cast(img, dtype=img_dtype)
img /= 255.0
offset = tf.constant([0.485, 0.456, 0.406], dtype=img_dtype)
offset = tf.broadcast_to(offset, tf.shape(img))
img -= offset
scale = tf.constant([0.229, 0.224, 0.225], dtype=img_dtype)
scale = tf.broadcast_to(scale, tf.shape(img))
img /= scale
return img
def postprocess_predictions(config, cls_outputs, box_outputs, scales, mode="global"):
"""Postprocess class and box predictions.
Modified from the original implementation: keras/efficientdet_keras.py: EfficientDetModel._postprocessing."""
if not mode:
return cls_outputs, box_outputs
# TODO(tanmingxing): remove this cast once FP16 works postprocessing.
cls_outputs = [tf.cast(i, tf.float32) for i in cls_outputs]
box_outputs = [tf.cast(i, tf.float32) for i in box_outputs]
if mode == "global":
return postprocess.postprocess_global(config.as_dict(), cls_outputs, box_outputs, scales)
if mode == "per_class":
return postprocess.postprocess_per_class(config.as_dict(), cls_outputs, box_outputs, scales)
raise ValueError("Unsupported postprocess mode {}".format(mode))
def visualise_detections(args, config, imgs, inputs, det_outputs, batch_num=0):
"""Modified from the original implementation: inference.py."""
boxes, scores, classes, valid_len = det_outputs
os.makedirs(args.output_dir, exist_ok=True)
start_idx = batch_num * args.micro_batch_size
end_idx = start_idx + args.micro_batch_size + 1
for i, img in enumerate(imgs[start_idx:end_idx]):
length = valid_len[i]
img = visualize_image(
img,
boxes[i].numpy()[:length],
classes[i].numpy().astype(np.int)[:length],
scores[i].numpy()[:length],
label_map=config.label_map,
min_score_thresh=config.nms_configs.score_thresh,
max_boxes_to_draw=config.nms_configs.max_output_size,
)
def get_filename(x):
return os.path.join(args.output_dir, f"{args.model_name}_{config.image_size}_{x}_b{batch_num}_img{i}.jpg")
Image.fromarray(img.astype(np.uint8)).save(get_filename("output"))
if inputs is not None:
Image.fromarray(inputs[i].numpy().astype(np.uint8)).save(get_filename("input"))
print(f"Writing annotated image to {get_filename('output')}")