Skip to content

Commit

Permalink
Updated train and generate_embeddings script which are now compatible.
Browse files Browse the repository at this point in the history
Former-commit-id: 491da19
  • Loading branch information
ldulcic committed Nov 11, 2019
1 parent c885f8d commit dd1fa61
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 12 deletions.
59 changes: 48 additions & 11 deletions training/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,34 @@
from PIL import Image
from torchvision import transforms, datasets
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn import metrics
from face_recognition import preprocessing, FaceFeaturesExtractor, FaceRecogniser


MODEL_DIR_PATH = 'model'


def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('-d', '--dataset-path', required=True, help='Path to folder with images.')
parser = argparse.ArgumentParser(
description='Script for training Face Recognition model. You can either give path to dataset or provide path '
'to pre-generated embeddings, labels and class_to_idx. You can pre-generate this with '
'util/generate_embeddings.py script.')
parser.add_argument('-d', '--dataset-path', help='Path to folder with images.')
parser.add_argument('-e', '--embeddings-path', help='Path to file with embeddings.')
parser.add_argument('-l', '--labels-path', help='Path to file with labels.')
parser.add_argument('-c', '--class-to-idx-path', help='Path to pickled class_to_idx dict.')
parser.add_argument('--grid-search', action='store_true',
help='If this option is enabled, grid search will be performed to estimate C parameter of '
'Logistic Regression classifier. In order to use this option you have to have at least '
'3 examples of every class in your dataset. It is recommended to enable this option.')
return parser.parse_args()


def dataset_to_embeddings(dataset, features_extractor):
transform = transforms.Compose([
preprocessing.ExifOrientationNormalize(),
transforms.Resize(1024)
])
preprocessing.ExifOrientationNormalize(),
transforms.Resize(1024)
])

embeddings = []
labels = []
Expand All @@ -40,17 +51,43 @@ def dataset_to_embeddings(dataset, features_extractor):
return np.stack(embeddings), labels


def load_data(args, features_extractor):
if args.embeddings_path:
return np.loadtxt(args.embeddings_path), \
np.loadtxt(args.labels_path, dtype='str').tolist(), \
joblib.load(args.class_to_idx_path)

dataset = datasets.ImageFolder(args.dataset_path)
embeddings, labels = dataset_to_embeddings(dataset, features_extractor)
return embeddings, labels, dataset.class_to_idx


def train(args, embeddings, labels):
softmax = LogisticRegression(solver='lbfgs', multi_class='multinomial', C=10, max_iter=10000)
if args.grid_search:
clf = GridSearchCV(
estimator=softmax,
param_grid={'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000]},
cv=3
)
else:
clf = softmax
clf.fit(embeddings, labels)

return clf.best_estimator_ if args.grid_search else clf


def main():
args = parse_args()

features_extractor = FaceFeaturesExtractor()
dataset = datasets.ImageFolder(args.dataset_path)
embeddings, labels = dataset_to_embeddings(dataset, features_extractor)
embeddings, labels, class_to_idx = load_data(args, features_extractor)
clf = train(args, embeddings, labels)

clf = LogisticRegression(C=10, solver='lbfgs', multi_class='multinomial')
clf.fit(embeddings, labels)
idx_to_class = {v: k for k, v in class_to_idx.items()}

idx_to_class = {v: k for k, v in dataset.class_to_idx.items()}
target_names = map(lambda i: i[1], sorted(idx_to_class.items(), key=lambda i: i[0]))
print(metrics.classification_report(labels, clf.predict(embeddings), target_names=list(target_names)))

if not os.path.isdir(MODEL_DIR_PATH):
os.mkdir(MODEL_DIR_PATH)
Expand Down
17 changes: 16 additions & 1 deletion util/generate_embeddings.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import argparse
import os
import joblib
import numpy as np
import torch
from torchvision import datasets
Expand All @@ -10,14 +11,26 @@
def parse_args():
parser = argparse.ArgumentParser(
"Script for generating face embeddings. Output of this script is 'embeddings.txt' which contains embeddings "
"for all input images and 'labels.txt' which contains label for every embedding.")
"for all input images, 'labels.txt' which contains label for every embedding and 'class_to_idx.pkl' which "
"is serializes dictionary which maps classes to its index.")
parser.add_argument('--input-folder', required=True,
help='Root folder where images are. This folder contains sub-folders for each class.')
parser.add_argument('--output-folder', required=True,
help='Output folder where image embeddings and labels will be saved.')
return parser.parse_args()


def normalise_string(string):
return string.lower().replace(' ', '_')


def normalise_dict_keys(dictionary):
new_dict = dict()
for key in dictionary.keys():
new_dict[normalise_string(key)] = dictionary[key]
return new_dict


def main():
torch.set_grad_enabled(False)
args = parse_args()
Expand All @@ -26,11 +39,13 @@ def main():
dataset = datasets.ImageFolder(args.input_folder)
embeddings, labels = dataset_to_embeddings(dataset, features_extractor)

dataset.class_to_idx = normalise_dict_keys(dataset.class_to_idx)
idx_to_class = {v: k for k, v in dataset.class_to_idx.items()}
labels = list(map(lambda idx: idx_to_class[idx], labels))

np.savetxt(args.output_folder + os.path.sep + 'embeddings.txt', embeddings)
np.savetxt(args.output_folder + os.path.sep + 'labels.txt', np.array(labels, dtype=np.str).reshape(-1, 1), fmt="%s")
joblib.dump(dataset.class_to_idx, args.output_folder + os.path.sep + 'class_to_idx.pkl')


if __name__ == '__main__':
Expand Down

0 comments on commit dd1fa61

Please sign in to comment.