diff --git a/pom.xml b/pom.xml
index 8169ff7..7826f26 100644
--- a/pom.xml
+++ b/pom.xml
@@ -9,11 +9,23 @@
big-data
http://maven.apache.org
-
+
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+
+ 1.8
+ 1.8
+
+
+
+
UTF-8
+ 1.8
+ 1.8
-
org.apache.mahout
diff --git a/src/data/movies.txt.gz b/src/data/movies.txt.gz
new file mode 100644
index 0000000..020e8a6
Binary files /dev/null and b/src/data/movies.txt.gz differ
diff --git a/src/main/java/movierec/MovieRecommender.java b/src/main/java/movierec/MovieRecommender.java
new file mode 100644
index 0000000..260eb45
--- /dev/null
+++ b/src/main/java/movierec/MovieRecommender.java
@@ -0,0 +1,125 @@
+// this class works with the src/data/movies.txt.gz compressed file as input
+// the src/data/movies.txt.gz current file is a provisional one for storage reasons
+// so, in order to pass the test properly, it needs to be replaced with the original 3+ GB file
+// ... with the correct file, this class generates an intermediate CSV file with clean data,
+// which is about 150 MB big
+
+package movierec;
+
+import java.io.*;
+import java.util.zip.GZIPInputStream;
+import java.util.ArrayList;
+import java.util.Hashtable;
+import java.util.List;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
+import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood;
+import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
+import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.cf.taste.recommender.UserBasedRecommender;
+import org.apache.mahout.cf.taste.similarity.UserSimilarity;
+
+public class MovieRecommender {
+ int totalProducts;
+ int totalUsers;
+ int totalReviews;
+ // keep hash tables to be able to 'translate' between numeric and alphanum. IDs
+ Hashtable products;
+ Hashtable users;
+ Hashtable productsByID;
+ String myPath;
+ String output;
+
+ public MovieRecommender(String pathTidyFile) throws Exception {
+ this.totalProducts = 0;
+ this.totalUsers = 0;
+ this.totalReviews = 0;
+ this.products = new Hashtable();
+ this.users = new Hashtable();
+ this.productsByID = new Hashtable();
+ this.myPath = pathTidyFile;
+ this.output = "src/data/nice_data.csv";
+ dataWrangling();
+ }
+
+ // method for cleaning and re-arranging the data so that it is a valid input
+ // for the mahout FileDataModel class
+ public void dataWrangling () throws IOException {
+ InputStream gzipStream = new GZIPInputStream(new FileInputStream(this.myPath));
+ BufferedReader buffered = new BufferedReader(new InputStreamReader(gzipStream));
+ FileWriter csvWriter = new FileWriter(this.output);
+
+ String auxLine = "";
+ String productStr = "product/productId: ";
+ String userStr = "review/userId: ";
+ String scoreStr = "review/score: ";
+ String user = "";
+ String product = "";
+ String score = "";
+
+ while (auxLine != null) {
+ if (auxLine.contains(userStr)) {
+ user = auxLine.split(" ")[1];
+ if(this.users.get(user) == null ) {
+ this.totalUsers ++;
+ this.users.put(user,this.totalUsers);
+ }
+ }
+ if (auxLine.contains(productStr)) {
+ product = auxLine.split(" ")[1];
+ if(this.products.get(product) == null ) {
+ this.totalProducts ++;
+ this.products.put(product,this.totalProducts);
+ this.productsByID.put(this.totalProducts,product);
+ }
+ }
+ if (auxLine.contains(scoreStr)) {
+ score = auxLine.split(" ")[1];
+ this.totalReviews ++;
+ }
+ if (user != "" && product != "" && score != "") {
+ csvWriter.write(this.users.get(user) + "," + this.products.get(product) + "," + score + "\n");
+ user = "";
+ product = "";
+ score = "";
+ }
+ auxLine = buffered.readLine();
+ }
+
+ buffered.close();
+ csvWriter.close();
+
+ }
+
+ // this method gets 3 item recommendations as output for a given user as input
+ public List getRecommendationsForUser(String userID) throws IOException, TasteException{
+ DataModel model = new FileDataModel(new File(this.output));
+ UserSimilarity similarity = new PearsonCorrelationSimilarity(model);
+ UserNeighborhood neighborhood = new ThresholdUserNeighborhood(0.1, similarity, model);
+ UserBasedRecommender recommender =
+ new GenericUserBasedRecommender(model, neighborhood, similarity);
+ List recommendations = new ArrayList();
+ for (RecommendedItem recommendation : recommender.recommend(this.users.get(userID), 3)) {
+ recommendations.add(this.productsByID.get((int )(recommendation.getItemID())));
+ }
+ return recommendations;
+
+ }
+
+ public int getTotalReviews() {
+ return totalReviews;
+ }
+
+ public int getTotalProducts() {
+ return totalProducts;
+ }
+
+ public int getTotalUsers() {
+ return totalUsers;
+ }
+}
+
diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java
index 0d0b1fe..8fb09e3 100644
--- a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java
+++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java
@@ -1,22 +1,24 @@
+// PLEASE CONSIDER THAT...
+// in order to pass the test properly,
+// the current rc/data/movies.txt.gz file needs to be replaced with the original 3+ GB file
package nearsoft.academy.bigdata.recommendation;
-import org.apache.mahout.cf.taste.common.TasteException;
+import movierec.MovieRecommender;
import org.junit.Test;
-
-import java.io.IOException;
import java.util.List;
-
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertThat;
import static org.junit.matchers.JUnitMatchers.hasItem;
+
public class MovieRecommenderTest {
@Test
- public void testDataInfo() throws IOException, TasteException {
+ public void testDataInfo() throws Exception {
//download movies.txt.gz from
// http://snap.stanford.edu/data/web-Movies.html
- MovieRecommender recommender = new MovieRecommender("/path/to/movies.txt.gz");
- assertEquals(7911684, recommender.getTotalReviews());
+
+ MovieRecommender recommender = new MovieRecommender("src/data/movies.txt.gz");
+ assertEquals(7911684, recommender.getTotalReviews()); // atributos clase principal
assertEquals(253059, recommender.getTotalProducts());
assertEquals(889176, recommender.getTotalUsers());
@@ -24,7 +26,5 @@ public void testDataInfo() throws IOException, TasteException {
assertThat(recommendations, hasItem("B0002O7Y8U"));
assertThat(recommendations, hasItem("B00004CQTF"));
assertThat(recommendations, hasItem("B000063W82"));
-
}
-
-}
+}
\ No newline at end of file