-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDomainWebSearch.java
More file actions
92 lines (77 loc) · 1.92 KB
/
DomainWebSearch.java
File metadata and controls
92 lines (77 loc) · 1.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
/*
Domain Search for the WebSpider, for SPLaT.
Written by Shafik Amin, 10-06-2003
*/
/** Imports **/
import java.util.*;
import java.net.*;
import java.io.*;
import java.util.regex.*;
public class DomainWebSearch extends WebSearch
{
/** Instance fields **/
private String myDomain; // eg http://www.arizona.edu
/** Constructors **/
public DomainWebSearch()
{
super();
}
/* overridden */
public void setWebsite(String url)
{
super.setWebsite(url);
setDomainName();
}
/* override the "WebSearch" behavior */
public boolean shouldWriteToDisk(String url)
{
if (!super.shouldWriteToDisk(url)) return false;
/* otherwise, filter potential write */
return url.indexOf(myDomain) >= 0;
}
/* override the "WebSearch" behavior */
public boolean shouldExplore(String url)
{
if (!super.shouldExplore(url)) return false;
/* otherwise, filter potential explores */
return url.indexOf(myDomain) >= 0;
}
/* sets the domain name to *this*'s domain name. */
private void setDomainName()
{
String site = getWebsite();
Pattern p = Pattern.compile("/");
myDomain = p.split(site)[2];
myDomain = "http://" + myDomain;
}
/* description tag */
public static String description()
{
return "Limits search to within the same domain name";
}
/** Test Driver **/
public static void main(String[] args)
{
String toTest = "http://www.cs.arizona.edu/people/collberg";
int number = 100;
int seconds = 3600;
File dl = new File("Test");
dl.mkdirs();
try
{
DomainWebSearch w = new DomainWebSearch();
w.setWebsite(toTest);
w.setDownloads(number);
System.out.println(w.myDomain + " is the domain: ");
w.setDepth(4);
w.setTime(seconds); // # of seconds
w.setDownloadLocation(dl);
w.startSearch();
System.out.println("Search ended!!");
}
catch (Exception e)
{
System.out.println(e);
}
}
}