-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathRMM.java
More file actions
113 lines (107 loc) · 3.56 KB
/
RMM.java
File metadata and controls
113 lines (107 loc) · 3.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package nlp;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
*
* @author quincy1994
*/
public class RMM {
private String m_sResult = ""; //切分后的结果串
private int m_nPosIndex; //游标指针
private int m_MaxLen; //最大取词长
private int totalMaxlen; //总最大取词长
private Set<String> dictionary; //分词字典
public RMM(int maxLen){
this.m_MaxLen = maxLen;
this.totalMaxlen = maxLen;
try {
this.dictionary = loadFile();
} catch (IOException ex) {
Logger.getLogger(RMM.class.getName()).log(Level.SEVERE, null, ex);
}
}
public RMM(){
this.m_MaxLen = 3;
this.totalMaxlen = 3;
try {
this.dictionary = loadFile();
} catch (IOException ex) {
Logger.getLogger(RMM.class.getName()).log(Level.SEVERE, null, ex);
}
}
public Set<String> loadFile() throws IOException{
Set<String> dictionary = new HashSet<String>();
String filename = "dict.txt";
BufferedReader br = new BufferedReader(new FileReader(filename));
String tmp;
while((tmp=br.readLine())!= null){
String[] token = tmp.split(",");
String word = token[0];
dictionary.add(word);
}
return dictionary;
}
public String RMMSegment(String source){
int len= totalMaxlen;
this.m_nPosIndex = source.length();
int frompos = this.m_nPosIndex;
rmm(source, m_MaxLen, m_nPosIndex);
//将结果按顺序输出
String[] token = m_sResult.split("/");
String result = "";
for(int i = token.length-1; i > 0 ; i--){
result += token[i] + "/ ";
}
return result;
}
public String getSubString(String source, int m_nPosIndex, int len){
int startIndex = m_nPosIndex - len;
while(startIndex < 0){
startIndex += 1;
}
String sub = source.substring(startIndex, m_nPosIndex);
return sub;
}
public void rmm(String source, int len, int frompos){
if(m_nPosIndex < 0) return;
String sub = getSubString(source, m_nPosIndex, len);
if(dictionary.contains(sub)){
//匹配成功
m_sResult += "/" + sub ;
m_nPosIndex = m_nPosIndex - m_MaxLen;
m_MaxLen = totalMaxlen;
rmm(source, m_MaxLen, m_nPosIndex);
}
else{
//不匹配
if(m_MaxLen > 1){
m_MaxLen = m_MaxLen - 1;
rmm(source, m_MaxLen, m_nPosIndex);
}
else{
// m_sResult += "/字典中没有(" + sub + ")字";
m_sResult += "/" + sub ;
m_nPosIndex -= 1;
m_MaxLen = totalMaxlen;
rmm(source, m_MaxLen, m_nPosIndex);
}
}
}
public static void main(String[] args) {
// TODO code application logic here
RMM myRMM = new RMM();
String source = "记录最佳前候选词列表";
String result = myRMM.RMMSegment(source);
System.out.println(result);
}
}