From ff1f944f452a5a4db2fedfdb2d6508f25eae0a3e Mon Sep 17 00:00:00 2001 From: pcanof Date: Mon, 26 Apr 2021 13:00:31 -0600 Subject: [PATCH 1/4] Add MovieRecommender class --- pom.xml | 15 ++++++++++++++- .../recommendation/MovieRecommenderTest.java | 2 +- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 8169ff7..c9bd5dd 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,20 @@ nearsoft.academy big-data 1.0-SNAPSHOT - jar + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.1 + + 16 + 16 + + + + + jar big-data http://maven.apache.org diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java index 0d0b1fe..06ea7d6 100644 --- a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java +++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java @@ -15,7 +15,7 @@ public class MovieRecommenderTest { public void testDataInfo() throws IOException, TasteException { //download movies.txt.gz from // http://snap.stanford.edu/data/web-Movies.html - MovieRecommender recommender = new MovieRecommender("/path/to/movies.txt.gz"); + MovieRecommender recommender = new MovieRecommender("movies.txt"); assertEquals(7911684, recommender.getTotalReviews()); assertEquals(253059, recommender.getTotalProducts()); assertEquals(889176, recommender.getTotalUsers()); From a94cf0837e0232883914258bd06dd34c7caf7e56 Mon Sep 17 00:00:00 2001 From: pcanof Date: Mon, 26 Apr 2021 13:03:07 -0600 Subject: [PATCH 2/4] Create MovieRecommender.java Create Movie Recommender class --- .../recommendation/MovieRecommender.java | 130 ++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java new file mode 100644 index 0000000..c55b8c7 --- /dev/null +++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java @@ -0,0 +1,130 @@ +package nearsoft.academy.bigdata.recommendation; + +import java.io.*; +import java.util.ArrayList; +import java.util.Hashtable; +import java.util.List; +import java.util.Scanner; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.commons.io.TaggedIOException; +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.model.file.FileDataModel; +import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood; +import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender; +import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood; +import org.apache.mahout.cf.taste.recommender.RecommendedItem; +import org.apache.mahout.cf.taste.recommender.UserBasedRecommender; +import org.apache.mahout.cf.taste.similarity.UserSimilarity; + +public class MovieRecommender { + Hashtable users; + Hashtable products; + Hashtable productById; + int totalUsers; + int totalProducts; + int totalReviews; + FileReader file; + + public MovieRecommender(String pathFile) throws IOException, FileNotFoundException{ + this.totalUsers = 0; + this.totalProducts = 0; + this.totalReviews = 0; + this.file = new FileReader(pathFile); + this.users = new Hashtable(); + this.products = new Hashtable(); + this.productById = new Hashtable(); + + processFile(); + } + + public void processFile() throws IOException, FileNotFoundException { + + String userId = ""; + String productId = ""; + String reviewId = ""; + + FileWriter csvfile = new FileWriter("data.csv"); + //Scanner readerFile = new Scanner(this.file); + BufferedReader br = new BufferedReader(this.file); + + Pattern usersRegex = Pattern.compile("review\\/userId: ([\\D\\d]+)"); + Pattern reviewsRegex = Pattern.compile("review\\/score: ([0-9]+).([0-9]+)"); + Pattern productsRegex = Pattern.compile("product\\/productId: ([\\D\\d]+)"); + + Matcher matcherProduct, matcherUser, matcherReview; + String Line; + //while (readerFile.hasNextLine()){ + while ((Line = br.readLine()) != null){ + //String Line = readerFile.nextLine(); + matcherProduct = productsRegex.matcher(Line); + matcherUser = usersRegex.matcher(Line); + matcherReview = reviewsRegex.matcher(Line); + + if(matcherUser.matches()) { + userId = Line.split(" ")[1]; + + if (users.get(userId) == null) { + this.totalUsers ++; + users.put(userId, this.totalUsers); + } + } + + if (matcherReview.matches()) { + reviewId = Line.split(" ")[1]; + this.totalReviews++; + } + + if (matcherProduct.matches()) { + productId = Line.split(" ")[1]; + + if (products.get(productId) == null) { + this.totalProducts++; + products.put(productId, this.totalProducts); + productById.put(this.totalProducts, productId); + } + + } + if (!userId.equals("") && !reviewId.equals("") && !productId.equals("")){ + csvfile.write(users.get(userId) + "," + products.get(productId) + "," + reviewId + "\n"); + userId = ""; + productId = ""; + reviewId = ""; + } + + } + + //503readerFile.close(); + csvfile.close(); + } + + public int getTotalReviews(){ + return this.totalReviews; + } + public int getTotalProducts(){ + return this.totalProducts; + } + public int getTotalUsers(){ + return this.totalUsers; + } + + public List getRecommendationsForUser(String userID) throws IOException, TasteException { + + DataModel model = new FileDataModel(new File("data.csv")); + UserSimilarity similarity = new PearsonCorrelationSimilarity(model); + UserNeighborhood neighborhood = new ThresholdUserNeighborhood(0.1, similarity, model); + UserBasedRecommender recommender = new GenericUserBasedRecommender(model, neighborhood, similarity); + + List recommendations = new ArrayList(); + + for (RecommendedItem recommendation : recommender.recommend(users.get(userID), 3)) { + recommendations.add(productById.get((int)(recommendation.getItemID()))); + } + + return recommendations; + } + +} From ab64b467dfa73eb3401df41691e45934ef3d6c8c Mon Sep 17 00:00:00 2001 From: pcanof Date: Mon, 26 Apr 2021 14:04:01 -0600 Subject: [PATCH 3/4] Update MovieRecommender.java Add comments and some modifications in the variables names --- .../recommendation/MovieRecommender.java | 36 +++++++++++-------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java index c55b8c7..30213ad 100644 --- a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java +++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java @@ -4,11 +4,9 @@ import java.util.ArrayList; import java.util.Hashtable; import java.util.List; -import java.util.Scanner; import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.apache.commons.io.TaggedIOException; import org.apache.mahout.cf.taste.common.TasteException; import org.apache.mahout.cf.taste.impl.model.file.FileDataModel; import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood; @@ -21,6 +19,7 @@ import org.apache.mahout.cf.taste.similarity.UserSimilarity; public class MovieRecommender { + //Initialize variables Hashtable users; Hashtable products; Hashtable productById; @@ -29,37 +28,40 @@ public class MovieRecommender { int totalReviews; FileReader file; - public MovieRecommender(String pathFile) throws IOException, FileNotFoundException{ + //Constructor + public MovieRecommender(String pathFile) throws IOException { this.totalUsers = 0; this.totalProducts = 0; this.totalReviews = 0; this.file = new FileReader(pathFile); - this.users = new Hashtable(); - this.products = new Hashtable(); - this.productById = new Hashtable(); + this.users = new Hashtable<>(); + this.products = new Hashtable<>(); + this.productById = new Hashtable<>(); processFile(); } - public void processFile() throws IOException, FileNotFoundException { + public void processFile() throws IOException { String userId = ""; String productId = ""; String reviewId = ""; - FileWriter csvfile = new FileWriter("data.csv"); - //Scanner readerFile = new Scanner(this.file); + FileWriter csvFile = new FileWriter("data.csv"); BufferedReader br = new BufferedReader(this.file); + //Patterns for search the respective data with Regular Expressions Pattern usersRegex = Pattern.compile("review\\/userId: ([\\D\\d]+)"); Pattern reviewsRegex = Pattern.compile("review\\/score: ([0-9]+).([0-9]+)"); Pattern productsRegex = Pattern.compile("product\\/productId: ([\\D\\d]+)"); + //Initialize a Matcher object for the conditions below Matcher matcherProduct, matcherUser, matcherReview; + //Initialize a string value for the line in the text file String Line; - //while (readerFile.hasNextLine()){ + while ((Line = br.readLine()) != null){ - //String Line = readerFile.nextLine(); + matcherProduct = productsRegex.matcher(Line); matcherUser = usersRegex.matcher(Line); matcherReview = reviewsRegex.matcher(Line); @@ -88,8 +90,11 @@ public void processFile() throws IOException, FileNotFoundException { } } + + // This condition is for write the csv file when the userid, reviewId and product id has a value then + // we reassign the variables to null values if (!userId.equals("") && !reviewId.equals("") && !productId.equals("")){ - csvfile.write(users.get(userId) + "," + products.get(productId) + "," + reviewId + "\n"); + csvFile.write(users.get(userId) + "," + products.get(productId) + "," + reviewId + "\n"); userId = ""; productId = ""; reviewId = ""; @@ -97,8 +102,7 @@ public void processFile() throws IOException, FileNotFoundException { } - //503readerFile.close(); - csvfile.close(); + csvFile.close(); } public int getTotalReviews(){ @@ -118,8 +122,10 @@ public List getRecommendationsForUser(String userID) throws IOException, UserNeighborhood neighborhood = new ThresholdUserNeighborhood(0.1, similarity, model); UserBasedRecommender recommender = new GenericUserBasedRecommender(model, neighborhood, similarity); - List recommendations = new ArrayList(); + List recommendations = new ArrayList<>(); + //For the 3 items in recommender.recommend(...) we add the values in the index from the hashtable productById in + // the recommendations list for (RecommendedItem recommendation : recommender.recommend(users.get(userID), 3)) { recommendations.add(productById.get((int)(recommendation.getItemID()))); } From b8dab38b6c9c319c2f5d943933bb19008a2b3a36 Mon Sep 17 00:00:00 2001 From: pcanof <37887903+pcanof@users.noreply.github.com> Date: Mon, 26 Apr 2021 14:22:52 -0600 Subject: [PATCH 4/4] Add comment for use a unzip file Only use a text file --- .../academy/bigdata/recommendation/MovieRecommenderTest.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java index 06ea7d6..604f663 100644 --- a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java +++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java @@ -15,6 +15,7 @@ public class MovieRecommenderTest { public void testDataInfo() throws IOException, TasteException { //download movies.txt.gz from // http://snap.stanford.edu/data/web-Movies.html + //Use a unzip file MovieRecommender recommender = new MovieRecommender("movies.txt"); assertEquals(7911684, recommender.getTotalReviews()); assertEquals(253059, recommender.getTotalProducts());