This repository has been archived by the owner on Aug 30, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathtriplet_cub_loader.py
131 lines (113 loc) · 5.4 KB
/
triplet_cub_loader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
from __future__ import print_function
import torch.utils.data as data
import os
import math
import errno
import torch
import json
import codecs
import numpy as np
import csv
import pandas as pd
from PIL import Image
import hard_mining
# README bounding_boxes.txt image_class_labels.txt images.txt shell_commands.txt test_idx.txt train_idx.txt
# attributes classes.txt images parts test_class_label.txt train_class_label.txt train_test_split.txt
def default_image_loader(path):
return Image.open(path).convert('RGB')
class CUB_t(data.Dataset):
training_file = 'train_idx.txt'
test_file = 'test_idx.txt'
train_class_label_file ='train_class_label.txt'
test_class_label_file ='test_class_label.txt'
def __init__(self, root, n_train_triplets=50000, n_test_triplets=10000, num_classes=-1, train=True, transform=None, target_transform=None, download=False):
self.loader = default_image_loader
self.root = root
self.transform = transform
self.train = train # training set or test set
self.im_base_path = os.path.join(root, 'images')
self.im_paths = pd.read_csv(os.path.join(root, 'images.txt'),
names=['idx', 'path'], sep = " ")['path'].tolist()
if num_classes < 0:
self.num_classes = 200
else:
self.num_classes = min(num_classes, 200)
if self.train:
colnames = ['idx','labels']
df = pd.read_csv(os.path.join(root, 'train_class_label.txt'),
names=colnames, sep = " ")
self.train_idx = df['idx'].tolist()
self.train_labels = df['labels'].tolist()
self.triplets_train = self.make_triplet_list(n_train_triplets)
else:
colnames = ['idx','labels']
df = pd.read_csv(os.path.join(root, 'test_class_label.txt'),
names=colnames, sep = " ")
self.test_idx = df['idx'].tolist()
self.test_labels = df['labels'].tolist()
self.triplets_test = self.make_triplet_list(n_test_triplets)
def __getitem__(self, index):
if self.train:
idx1, idx2, idx3 = self.triplets_train[index]
else:
idx1, idx2, idx3 = self.triplets_test[index]
img1 = self.loader(os.path.join(self.im_base_path, self.im_paths[idx1]))
img2 = self.loader(os.path.join(self.im_base_path, self.im_paths[idx2]))
img3 = self.loader(os.path.join(self.im_base_path, self.im_paths[idx3]))
img1 = img1.resize((64,64))
img2 = img2.resize((64,64))
img3 = img3.resize((64,64))
if self.transform is not None:
img1 = self.transform(img1)
img2 = self.transform(img2)
img3 = self.transform(img3)
return img1, img2, img3, idx1, idx2, idx3
def __len__(self):
if self.train:
return len(self.triplets_train)
else:
return len(self.triplets_test)
def make_triplet_list(self, ntriplets):
print('Processing Triplet Generation ...')
if self.train:
np_labels = np.array(self.train_labels)
else:
np_labels = np.array(self.test_labels)
triplets = []
nc = int(self.num_classes)
for class_idx in range(1,nc+1):
# a, b, c are index of np_labels where it's equal to class_idx
a = np.random.choice(np.where(np_labels==class_idx)[0],
int(ntriplets/nc), replace=True)
b = np.random.choice(np.where(np_labels==class_idx)[0],
int(ntriplets/nc), replace=True)
while np.any((a-b)==0): #aligning check. so that same indx at a and b wouldn't be same
np.random.shuffle(b)
c = np.random.choice(np.where(np_labels!=class_idx)[0],
int(ntriplets/nc), replace=True)
for i in range(a.shape[0]):
anchor,positive, negative = int(a[i]), int(c[i]), int(b[i])
if self.train:
triplets.append((self.train_idx[anchor],
self.train_idx[positive],
self.train_idx[negative]))
else:
triplets.append((self.test_idx[anchor],
self.test_idx[positive],
self.test_idx[negative]))
print('Done!')
return triplets # save the triplets to class
def regenerate_triplet_list(self, ntriplets, sampler, num_hard):
assert(self.train)
print("Processing Triplet Regeneration ...")
# negatives is a tuple of anchors and negative examples
num_random_triplets = ntriplets - num_hard
# adjust number of random triplets so that it is a multiple of num_classes
num_random_triplets = int(math.ceil(num_random_triplets)/self.num_classes)*self.num_classes
num_hard = ntriplets - num_random_triplets
print("Number of hard triplets %d ..." % num_hard)
print("Number of total triplets %d ..." % ntriplets)
neg_hard_triplets = sampler.ChooseNegatives(num_hard)
random_triplets = self.make_triplet_list(num_random_triplets)
self.triplets_train = random_triplets + neg_hard_triplets
np.random.shuffle(self.triplets_train)