Skip to content

Commit d3b775d

Browse files
committed
Cosmetic corrections
1 parent 78b1a5e commit d3b775d

File tree

5 files changed

+43
-2
lines changed

5 files changed

+43
-2
lines changed

nbactions.xml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<actions>
3+
<action>
4+
<actionName>run</actionName>
5+
<packagings>
6+
<packaging>jar</packaging>
7+
</packagings>
8+
<goals>
9+
<goal>process-classes</goal>
10+
<goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal>
11+
</goals>
12+
<properties>
13+
<exec.args>-classpath %classpath info.debatty.java.stringsimilarity.Main</exec.args>
14+
<exec.executable>java</exec.executable>
15+
</properties>
16+
</action>
17+
</actions>

src/main/java/info/debatty/java/stringsimilarity/CharacterSubstitutionInterface.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
* For example, in an OCR application, cost('o', 'a') could be 0.4
3232
* In a checkspelling application, cost('u', 'i') could be 0.4 because these are
3333
* next to each other on the keyboard...
34+
*
3435
* @author Thibault Debatty
3536
*/
3637
public interface CharacterSubstitutionInterface {

src/main/java/info/debatty/java/stringsimilarity/Cosine.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,15 @@ public Cosine() {
7474
* @return Cosine similarity
7575
*/
7676
public double similarity(String s1, String s2) {
77+
if (s1.equals(s2)) {
78+
return 1.0;
79+
}
80+
81+
82+
if (s1.equals("") || s2.equals("")) {
83+
return 0.0;
84+
}
85+
7786
KShingling ks = new KShingling(this.k);
7887
ks.parse(s1);
7988
ks.parse(s2);

src/main/java/info/debatty/java/stringsimilarity/Damerau.java

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,14 +44,25 @@ public class Damerau implements StringSimilarityInterface {
4444
public static void main(String[] args) {
4545

4646
Damerau d = new Damerau();
47+
// 1 switch
4748
System.out.println(d.absoluteDistance("ABCDEF", "ABDCEF"));
49+
50+
// 2 switches
4851
System.out.println(d.absoluteDistance("ABCDEF", "BACDFE"));
52+
53+
// 1 deletion
4954
System.out.println(d.absoluteDistance("ABCDEF", "ABCDE"));
55+
56+
// 1 deletion
5057
System.out.println(d.absoluteDistance("ABCDEF", "BCDEF"));
5158
System.out.println(d.absoluteDistance("ABCDEF", "ABCGDEF"));
5259
System.out.println(d.absoluteDistance("ABCDEF", "BCDAEF"));
5360

5461
System.out.println(d.distance("ABCDEF", "GHABCDE"));
62+
63+
// All different
64+
System.out.println(d.absoluteDistance("ABCDEF", "POIU"));
65+
System.out.println(d.similarity("ABCDEF", "POIU"));
5566
}
5667

5768
public int absoluteDistance(String s1, String s2) {
@@ -89,7 +100,6 @@ public int absoluteDistance(String s1, String s2) {
89100

90101
}
91102

92-
93103
// fill in the distance matrix H
94104
// look at each character in s1
95105
for (int i = 1; i <= s1.length(); i++) {
@@ -124,7 +134,7 @@ public double similarity(String s1, String s2) {
124134
}
125135

126136
public double distance(String s1, String s2) {
127-
return (double) absoluteDistance(s1, s2) / (s1.length() + s2.length());
137+
return (double) absoluteDistance(s1, s2) / Math.max(s1.length(), s2.length());
128138
}
129139

130140
protected static int min(int a, int b, int c, int d) {

src/main/java/info/debatty/java/stringsimilarity/Main.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,15 @@ public class Main {
3737
*/
3838
public static void main(String[] args) {
3939
ArrayList<StringSimilarityInterface> similarities = new ArrayList<StringSimilarityInterface>();
40+
similarities.add(new Cosine(4));
41+
similarities.add(new Damerau());
42+
similarities.add(new Jaccard(4));
4043
similarities.add(new JaroWinkler());
4144
similarities.add(new Levenshtein());
4245
similarities.add(new LongestCommonSubsequence());
4346
similarities.add(new NGram(4));
4447
similarities.add(new QGram(4));
48+
similarities.add(new SorensenDice(4));
4549

4650
ArrayList<Pair> pairs = new ArrayList<Pair>();
4751
// adjacent letters switch

0 commit comments

Comments
 (0)