-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDataReader.java
More file actions
81 lines (67 loc) · 2.17 KB
/
DataReader.java
File metadata and controls
81 lines (67 loc) · 2.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
package cs475;
import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
public class DataReader {
private Scanner _scanner;
// Classification or regression?
private boolean _classification;
public DataReader(String filename, boolean classification) throws FileNotFoundException {
this._scanner = new Scanner(new BufferedInputStream(new FileInputStream(filename)));
this._classification = classification;
}
public void close() {
this._scanner.close();
}
public List<Instance> readData() {
ArrayList<Instance> instances = new ArrayList<Instance>();
while (this._scanner.hasNextLine()) {
String line = this._scanner.nextLine();
if (line.trim().length() == 0)
continue;
FeatureVector feature_vector = new FeatureVector();
// Divide the line into features and label.
String[] split_line = line.split(" ");
String label_string = split_line[0];
Label label = null;
if (this._classification) {
int int_label = Integer.parseInt(label_string);
if (int_label != -1) {
label = new ClassificationLabel(int_label);
}
} else {
try {
double double_label = Double.parseDouble(label_string);
label = new RegressionLabel(double_label);
} catch (Exception e) {
}
}
for (int ii = 1; ii < split_line.length; ii++) {
String item = split_line[ii];
String name = item.split(":")[0];
int index = Integer.parseInt(name);
double value = Double.parseDouble(item.split(":")[1]);
if (value != 0)
feature_vector.add(index, value);
// System.out.println(feature_vector.get(index));
}
// HashMap<Integer,Double> vectormap = feature_vector.getVector();
// Iterator<Integer> ik = vectormap.keySet().iterator();
// while (ik.hasNext()) {
// int key = ik.next();
// System.out.printf("K: %d", key);
//
// System.out.printf("V: %f", vectormap.get(key));
// }
Instance instance = new Instance(feature_vector, label);
instances.add(instance);
}
return instances;
}
}