-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathData.py
More file actions
79 lines (70 loc) · 2.63 KB
/
Data.py
File metadata and controls
79 lines (70 loc) · 2.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# Class containing the data and functions to load/create batch
# Assumes downsampled data as found in http://www.image-net.org/
import numpy as np
import pickle
def unpickle(file):
import pickle
with open(file, 'rb') as fo:
dict = pickle.load(fo, encoding='bytes')
return dict
class Data:
def __init__(self, setname, location, eval_size=0, load=True, ratio=1.0, preload=True, reclass='n'):
self.setname = setname
self.preload = preload
if self.setname=='ImageNet32':
self.c = 3
self.h = 32
self.w = 32
self.ncl = 1000
self.norm = 1./255
elif self.setname=='ImageNet64':
self.c = 3
self.h = 64
self.w = 64
self.ncl = 1000
self.norm = 1./255
else:
print("Dataset unknown!")
rcpar = reclass.split(',')
if rcpar[0]=='m' or rcpar[0]=='f':
self.ncl = int(rcpar[1])
if load:
self.train_size = int(ratio*1281167)
tmp = unpickle(location+"/train_data_batch_1") #~128k data per batch
raw_t_in = tmp['data']
self.t_lab = np.asarray(tmp['labels'])-1
self.mean_in = tmp['mean'].astype(np.float32)
s = 2
while len(self.t_lab)<self.train_size:
tmp = unpickle(location+"/train_data_batch_"+str(s))
raw_t_in = np.append(raw_t_in, tmp['data'], axis=0)
self.t_lab = np.append(self.t_lab, np.asarray(tmp['labels'])-1, axis=0)
s += 1
raw_t_in = raw_t_in[:self.train_size]
self.t_lab = self.t_lab[:self.train_size]
if self.preload:
self.t_in = np.reshape(self.norm*(np.asarray(raw_t_in, dtype=np.float32)-self.mean_in), (-1,self.c,self.h,self.w))
else:
self.t_in = raw_t_in
tmp = unpickle(location+"/val_data") #50k test data
self.e_in = np.reshape(self.norm*(np.asarray(tmp['data'][:eval_size], dtype=np.float32)-self.mean_in), (-1,self.c,self.h,self.w))
self.e_lab = np.asarray(tmp['labels'][:eval_size])-1
if rcpar[0]=='m':
self.e_lab = self.e_lab%self.ncl
self.t_lab = self.t_lab%self.ncl
elif rcpar[0]=='f':
indmap = np.loadtxt(rcpar[2], delimiter=',', dtype=int)
if np.max(indmap)>self.ncl-1:
print("More classes than expected!")
exit()
self.e_lab = indmap[self.e_lab]
self.t_lab = indmap[self.t_lab]
return
def getbatch(self, batch_size):
batch_ind = np.random.choice(self.train_size, batch_size)
batch_lab = self.t_lab[batch_ind]
if self.preload:
batch_in = self.t_in[batch_ind]
else:
batch_in = np.reshape(self.norm*(self.t_in[batch_ind].astype(np.float32)-self.mean_in), (-1,self.c,self.h,self.w))
return batch_in, batch_lab