diff --git a/.gitignore b/.gitignore
index e9aa7e9..3ba916c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,14 @@
#Ignore
-bin/*
-testreport/*
-examples/backtothefuture/build/*
-target/*
+bin/
+testreport/
+examples/backtothefuture/build/
+
+## Maven
+target/
+
+## IntelliJ
+*.iml
+.idea
+
+## Mac OS X
+.DS_Store
diff --git a/LICENSE.txt b/LICENSE.txt
new file mode 100644
index 0000000..17d470f
--- /dev/null
+++ b/LICENSE.txt
@@ -0,0 +1,28 @@
+Copyright (c) 2010-2021 John Deverall. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/README.md b/README.md
index a4203ae..37ded66 100644
--- a/README.md
+++ b/README.md
@@ -6,11 +6,11 @@ Due to the lack of useful native DOM parsers this class implements the HTMLClean
## Usage ##
In this example we will fetch the og:title and og:type contents, while ignoring any errors if this page does not comply with the Open Graph protocol standard (set in the constructor via true)
-> OpenGraph testPage = new OpenGraph("http://uk.rottentomatoes.com/m/1217700-kick_ass", true);
+> `OpenGraph testPage = new OpenGraph("http://uk.rottentomatoes.com/m/1217700-kick_ass", true);`
-> String title = testPage.getContent("title");
+> `String title = testPage.getContent("title");`
-> String type = testPage.getContent("type");
+> `String type = testPage.getContent("type");`
Another example (available in the examples/ folder) demonstrates the support for custom OpenGraph namespaces
diff --git a/examples/backtothefuture/build.xml b/examples/backtothefuture/build.xml
deleted file mode 100644
index e8d4a48..0000000
--- a/examples/backtothefuture/build.xml
+++ /dev/null
@@ -1,72 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/examples/backtothefuture/src/BackToTheFuture.java b/examples/backtothefuture/src/main/java/org/opengraph/examples/BackToTheFuture.java
similarity index 73%
rename from examples/backtothefuture/src/BackToTheFuture.java
rename to examples/backtothefuture/src/main/java/org/opengraph/examples/BackToTheFuture.java
index 6bd13e7..c822b26 100644
--- a/examples/backtothefuture/src/BackToTheFuture.java
+++ b/examples/backtothefuture/src/main/java/org/opengraph/examples/BackToTheFuture.java
@@ -1,19 +1,16 @@
package org.opengraph.examples;
-import org.opengraph.OpenGraph;
import org.opengraph.MetaElement;
+import org.opengraph.OpenGraph;
public class BackToTheFuture {
- static String uri = "http://www.rottentomatoes.com/m/back_to_the_future/";
-
- public static void main(String [] args)
- {
- try
- {
+ static String uri = "https://web.archive.org/web/20110924151516/https://www.rottentomatoes.com/m/back_to_the_future";
+
+ public static void main(String[] args) {
+ try {
OpenGraph movie = new OpenGraph(uri, true);
System.out.println("Movie: " + movie.getContent("title"));
- for (MetaElement director : movie.getProperties("director"))
- {
+ for (MetaElement director : movie.getProperties("director")) {
OpenGraph extendedInfo = director.getExtendedData();
System.out.println("Directed by: " + extendedInfo.getContent("title"));
}
diff --git a/examples/pom.xml b/examples/pom.xml
new file mode 100644
index 0000000..53dc844
--- /dev/null
+++ b/examples/pom.xml
@@ -0,0 +1,55 @@
+
+ 4.0.0
+ opengraph-examples
+ ${project.parent.version}
+ jar
+
+
+ opengraph
+ opengraph
+ 0.0.2-SNAPSHOT
+
+
+
+
+ opengraph
+ opengraph-plugin
+ 0.0.2-SNAPSHOT
+
+
+
+ backtothefuture/src/main/java
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+ 3.8.1
+
+
+ org.apache.maven.plugins
+ maven-assembly-plugin
+ 3.3.0
+
+
+ package
+
+ single
+
+
+
+
+ org.opengraph.examples.BackToTheFuture
+
+
+
+ jar-with-dependencies
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/plugin/pom.xml b/plugin/pom.xml
new file mode 100644
index 0000000..b917484
--- /dev/null
+++ b/plugin/pom.xml
@@ -0,0 +1,24 @@
+
+ 4.0.0
+ opengraph-plugin
+ ${project.parent.version}
+ jar
+
+
+ opengraph
+ opengraph
+ 0.0.2-SNAPSHOT
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+ 3.8.1
+
+
+
+
+
\ No newline at end of file
diff --git a/plugin/src/main/java/org/opengraph/MetaElement.java b/plugin/src/main/java/org/opengraph/MetaElement.java
new file mode 100644
index 0000000..d7e8916
--- /dev/null
+++ b/plugin/src/main/java/org/opengraph/MetaElement.java
@@ -0,0 +1,65 @@
+package org.opengraph;
+
+import java.net.URL;
+
+/**
+ * Represents OpenGraph enabled meta data for a specific document
+ *
+ * @author Callum Jones
+ */
+public class MetaElement {
+ private OpenGraphNamespace namespace; //either "og" an NS specific
+ private String property;
+ private String content;
+
+ /**
+ * Construct the representation of an element
+ *
+ * @param namespace The namespace the element belongs to
+ * @param property The property key
+ * @param content The content or value of this element
+ */
+ public MetaElement(OpenGraphNamespace namespace, String property, String content) {
+ this.namespace = namespace;
+ this.property = property;
+ this.content = content;
+ }
+
+ /**
+ * Fetch the content string of the element
+ */
+ public String getContent() {
+ return content;
+ }
+
+ /**
+ * Fetch the OpenGraph namespace
+ */
+ public OpenGraphNamespace getNamespace() {
+ return namespace;
+ }
+
+ /**
+ * Fetch the property of the element
+ */
+ public String getProperty() {
+ return property;
+ }
+
+ /**
+ * Fetch the OpenGraph data from the object
+ *
+ * @return If the content is a URL, then an attempted will be made to build OpenGraph data from the object
+ */
+ public OpenGraph getExtendedData() {
+ //The Java language should know the best form of a URL
+ try {
+ URL url = new URL(getContent());
+
+ //success
+ return new OpenGraph(url.toString(), true);
+ } catch (Exception e) {
+ return null; //not a valid URL
+ }
+ }
+}
\ No newline at end of file
diff --git a/plugin/src/main/java/org/opengraph/OpenGraph.java b/plugin/src/main/java/org/opengraph/OpenGraph.java
new file mode 100644
index 0000000..766a932
--- /dev/null
+++ b/plugin/src/main/java/org/opengraph/OpenGraph.java
@@ -0,0 +1,404 @@
+package org.opengraph;
+
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.Hashtable;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import org.htmlcleaner.HtmlCleaner;
+import org.htmlcleaner.TagNode;
+
+/**
+ * A Java object representation of an Open Graph enabled webpage.
+ * A simplified layer over a Hastable.
+ *
+ * @author Callum Jones
+ */
+public class OpenGraph {
+ private String pageUrl;
+ private ArrayList pageNamespaces;
+ private Hashtable> metaAttributes;
+ private String baseType;
+ private boolean isImported; // determine if the object is a new incarnation or representation of a web page
+ private boolean hasChanged; // track if object has been changed
+
+ public final static String[] REQUIRED_META = new String[] {"title", "type", "image", "url"};
+
+ public final static Hashtable BASE_TYPES = new Hashtable();
+
+ static {
+ BASE_TYPES.put("activity", new String[] {"activity", "sport"});
+ BASE_TYPES.put("business", new String[] {"bar", "company", "cafe", "hotel", "restaurant"});
+ BASE_TYPES.put("group", new String[] {"cause", "sports_league", "sports_team"});
+ BASE_TYPES.put("organization", new String[] {"band", "government", "non_profit", "school", "university"});
+ BASE_TYPES.put("person",
+ new String[] {"actor", "athlete", "author", "director", "musician", "politician", "profile", "public_figure"});
+ BASE_TYPES.put("place", new String[] {"city", "country", "landmark", "state_province"});
+ BASE_TYPES
+ .put("product", new String[] {"album", "book", "drink", "food", "game", "movie", "product", "song", "tv_show"});
+ BASE_TYPES.put("website", new String[] {"blog", "website", "article"});
+ }
+
+ /**
+ * Create an open graph representation for generating your own Open Graph object
+ */
+ public OpenGraph() {
+ pageNamespaces = new ArrayList();
+ metaAttributes = new Hashtable>();
+ hasChanged = false;
+ isImported = false;
+ }
+
+ /**
+ * Fetch the open graph representation from a web site
+ *
+ * @param url The address to the web page to fetch Open Graph data
+ * @param ignoreSpecErrors Set this option to true if you don't wish to have an exception throw if the page does not conform to the basic 4 attributes
+ * @throws java.io.IOException If a network error occurs, the HTML parser will throw an IO Exception
+ * @throws java.lang.Exception A generic exception is throw if the specific page fails to conform to the basic Open Graph standard as define by the constant REQUIRED_META
+ */
+ public OpenGraph(String url, boolean ignoreSpecErrors) throws java.io.IOException, Exception {
+ this();
+ isImported = true;
+
+
+ // download the (X)HTML content, but only up to the closing head tag. We do not want to waste resources parsing irrelevant content
+ System.out.println(url);
+ URL pageURL = new URL(url);
+ HttpURLConnection siteConnection = (HttpURLConnection) pageURL.openConnection();
+ siteConnection.connect();
+ if(siteConnection.getHeaderField("Location") != null )
+ {
+ String redirect = siteConnection.getHeaderField("Location");
+ siteConnection.disconnect();
+ pageURL = new URL(redirect);
+ siteConnection = (HttpURLConnection) pageURL.openConnection();
+ siteConnection.connect();
+ }
+ Charset charset = getConnectionCharset(siteConnection);
+ BufferedReader dis = new BufferedReader(new InputStreamReader(siteConnection.getInputStream(), charset));
+ String inputLine;
+ StringBuffer headContents = new StringBuffer();
+
+ // Loop through each line, looking for the closing head element
+ while ((inputLine = dis.readLine()) != null) {
+ if (inputLine.contains("")) {
+ inputLine = inputLine.substring(0, inputLine.indexOf("") + 7);
+ inputLine = inputLine.concat("