-
Notifications
You must be signed in to change notification settings - Fork 11
Expand file tree
/
Copy pathWordStatIndex.java
More file actions
63 lines (58 loc) · 2.19 KB
/
WordStatIndex.java
File metadata and controls
63 lines (58 loc) · 2.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.List;
import java.util.Comparator;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.io.*;
public class WordStatIndex {
private static boolean isWordElement(char c) {
return Character.isLetter(c) || c == '\'' || Character.DASH_PUNCTUATION == Character.getType(c);
}
private static ArrayList<Integer> mergeLists(ArrayList<Integer> a, ArrayList<Integer> b) {
a.addAll(b);
return a;
}
public static void main(String[] args) throws IOException {
Map<String, ArrayList<Integer>> map = new LinkedHashMap<>();
Scanner input = new Scanner(new File(args[0]));
int idx = 1;
while(input.hasNext()) {
String line = input.next();
int i = 0;
while (i < line.length()) {
while (i < line.length() && !isWordElement(line.charAt(i))) {
i++;
}
int numStart = i;
while (i < line.length() && isWordElement(line.charAt(i))) {
i++;
}
if (i - numStart == 0) {
continue;
}
String word = line.substring(numStart, i).toLowerCase();
map.merge(word, new ArrayList<Integer>(List.of(idx)), WordStatIndex::mergeLists);
idx++;
}
}
input.close();
PrintWriter out = new PrintWriter(
new BufferedWriter(
new OutputStreamWriter(
new FileOutputStream(args[1]),
StandardCharsets.UTF_8)));
// List<Map.Entry<String, Integer>> sorted = new ArrayList<>(map.entrySet());
// sorted.sort(Comparator.comparing(Map.Entry::getValue));
for (Map.Entry<String, ArrayList<Integer>> entry : map.entrySet()) {
String word = entry.getKey();
ArrayList<Integer> positions = entry.getValue();
out.print(word + " " + positions.size());
for (int i : positions) {
out.print(" " + i);
}
out.println();
}
out.close();
}
}