-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdataset.py
More file actions
92 lines (80 loc) · 2.58 KB
/
dataset.py
File metadata and controls
92 lines (80 loc) · 2.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
from numpy import *
from os import listdir
from os.path import isfile
import os
from util import *
import struct
import io
class dataSet(object):
n = 0
mat = []
labels = []
datafile = ""
charCode = "L" if os.name == "nt" else "I"
def load(self):
f = io.open(self.datafile,"rb")
# L for windows; I for linux/unix
self.n = struct.unpack(self.charCode,f.read(4))[0]
veclen = struct.unpack(self.charCode,f.read(4))[0]
self.mat = zeros((self.n,veclen))
for i in range(self.n):
n = struct.unpack("H",f.read(2))[0]
self.labels.append(n)
for j in range(veclen):
x = struct.unpack("B",f.read(1))[0]
self.mat[i][j] = x
f.close()
def save(self):
f = open(self.datafile,"wb")
f.write(struct.pack(self.charCode,self.n))
f.write(struct.pack(self.charCode,len(self.mat[0])))
for i in range(self.n):
f.write(struct.pack("H",self.labels[i]))
for j in self.mat[i]:
f.write(struct.pack("B",j))
f.close()
class trainingSet(dataSet):
datafile = "trainingSet.dat"
def __init__(self):
self.labels=[]
if isfile(self.datafile):
self.load()
else:
self.loadRaw()
self.save()
def loadRaw(self):
trainingFileList = listdir('trainingDigits')
m = len(trainingFileList)
self.mat = zeros((m,1024))
self.n = m
for i in range(m):
fileNameStr = trainingFileList[i]
fileStr = fileNameStr.split('.')[0]
classNumStr = int(fileStr.split('_')[0])
self.labels.append(classNumStr)
self.mat[i,:] = img2vector('trainingDigits/%s' % fileNameStr)
def selfLearn(self,vec,label):
self.mat = vstack((self.mat,vec))
self.labels.append(label)
self.n += 1
self.save()
class testSet(dataSet):
datafile = "testSet.dat"
def __init__(self):
self.labels=[]
if isfile(self.datafile):
self.load()
else:
self.loadRaw()
self.save()
def loadRaw(self):
trainingFileList = listdir('testDigits')
m = len(trainingFileList)
self.mat = zeros((m,1024))
self.n = m
for i in range(m):
fileNameStr = trainingFileList[i]
fileStr = fileNameStr.split('.')[0]
classNumStr = int(fileStr.split('_')[0])
self.labels.append(classNumStr)
self.mat[i,:] = img2vector('testDigits/%s' % fileNameStr)