-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain_compute_image_metrics.py
91 lines (81 loc) · 4.08 KB
/
train_compute_image_metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
from PIL import Image
import sys
import os
from os import listdir
import numpy
import find_lines
import json
import io
black = 0
white = 255
metrics_array_size = 20
matrix_size = 9
def output_json(matrix, output_file_name):
output = {}
numbers, count = matrix.shape
for number in range(numbers):
if number > 0:
output[number] = matrix[number].tolist()
print(json.dumps(output, indent=4))
output_file = open(output_file_name, 'w')
output_file.write(json.dumps(output, indent=4))
output_file.close()
def main():
if len(sys.argv) < 3:
raise Exception("Input parameter 1: folder of folders of training data images. Input parameter 2: folder for JSON output representing image metrics.")
# input fold of images organized into folders named by the number in in the image
input_folder = sys.argv[1]
# output folder for json metrics
output_folder = sys.argv[2]
# matrix to keep totals for where bits are found in the example images on both x and y axes
x_metrics = numpy.zeros((matrix_size+1, metrics_array_size), dtype=float)
y_metrics = numpy.zeros((matrix_size+1, metrics_array_size), dtype=float)
for image_folder_name in listdir(input_folder):
image_folder = input_folder + image_folder_name
if os.path.isdir(image_folder):
print("Processing: %s..." % image_folder_name)
# current_number is the number in the images in this folder
current_number = int(image_folder_name)
for image_file in listdir(image_folder):
image_file_name, image_file_extension = os.path.splitext(image_file)
if image_file_extension == '' or image_file_name[0] == '.':
print("Skipping, not an image file.")
else:
image = Image.open("%s/%s" % (image_folder, image_file))
# arrays to hold the x and y axis totals for this image
x_histogram, y_histogram = find_lines.count_pixels(image)
# because images come in different sizes we need to normalize the length of the array
# of counts to be a constant
normalized_x = find_lines.morph_array_to_size(x_histogram, metrics_array_size)
normalized_y = find_lines.morph_array_to_size(y_histogram, metrics_array_size)
print("%s:\tx: %s, %s\ty: %s, %s" % (image_file, sum(x_histogram), sum(normalized_x),
sum(y_histogram), sum(normalized_y)))
print("x:\t%s" % normalized_x)
print("y:\t%s" % normalized_y)
# now add totals from this image to totals
for n in range(metrics_array_size):
x_metrics[current_number][n] += normalized_x[n]
y_metrics[current_number][n] += normalized_y[n]
print("x %s total:\t%s" % (current_number, x_metrics[current_number]))
print("y %s total:\t%s" % (current_number, y_metrics[current_number]))
print("Finished image %s" % image_file)
print("Finished: %s" % image_folder)
# Now that we have counts for all images of this number, we need to compute the % distribution
print("x:")
print(x_metrics[current_number])
print("y:")
print(y_metrics[current_number])
total = sum(x_metrics[current_number])
for n in range(x_metrics[current_number].size):
x_metrics[current_number][n] = x_metrics[current_number][n] / total
total = sum(y_metrics[current_number])
for n in range(y_metrics[current_number].size):
y_metrics[current_number][n] = y_metrics[current_number][n] / total
print("x:")
print(x_metrics[current_number])
print("y:")
print(y_metrics[current_number])
output_json(x_metrics, output_folder + "x.json")
output_json(y_metrics, output_folder + "y.json")
if __name__ == '__main__':
main()