From d0f686d75267435b57af1a7221d7b8feda0d4c72 Mon Sep 17 00:00:00 2001 From: Edwin Fajardo Date: Thu, 21 Oct 2021 13:19:15 -0500 Subject: [PATCH 1/4] Added recommender class to pass tests and slf4j dependencies --- pom.xml | 12 + .../nearsoft/academy/MovieRecommender.java | 208 ++++++++++++++++++ .../recommendation/MovieRecommenderTest.java | 10 +- 3 files changed, 226 insertions(+), 4 deletions(-) create mode 100644 src/main/java/nearsoft/academy/MovieRecommender.java diff --git a/pom.xml b/pom.xml index 8169ff7..9b4ec34 100644 --- a/pom.xml +++ b/pom.xml @@ -12,6 +12,8 @@ UTF-8 + 1.8 + 1.8 @@ -26,5 +28,15 @@ 4.7 test + + org.slf4j + slf4j-api + 1.7.32 + + + org.slf4j + slf4j-simple + 1.7.5 + diff --git a/src/main/java/nearsoft/academy/MovieRecommender.java b/src/main/java/nearsoft/academy/MovieRecommender.java new file mode 100644 index 0000000..0c2db02 --- /dev/null +++ b/src/main/java/nearsoft/academy/MovieRecommender.java @@ -0,0 +1,208 @@ +package nearsoft.academy; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.zip.GZIPInputStream; + +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.model.file.FileDataModel; +import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood; +import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender; +import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood; +import org.apache.mahout.cf.taste.recommender.RecommendedItem; +import org.apache.mahout.cf.taste.recommender.UserBasedRecommender; +import org.apache.mahout.cf.taste.similarity.UserSimilarity; + + +public class MovieRecommender { + private static String ROOT_PATH = System.getProperty("user.dir"); + private static String CSV_DATA_PATH = ROOT_PATH + "/src/main/resources/data.csv"; + // private static String USER_INDEX = ROOT_PATH + "/src/main/resources/users.csv"; + // private static String PRODUCT_INDEX = ROOT_PATH + "/src/main/resources/products.csv"; + + private String dataFilePath; + private Map products = new HashMap(); + private Map productsReverse = new HashMap(); + private Map users = new HashMap(); + private int totalUsers = 0; + private int totalProducts = 0; + private int totalReviews = 0; + + // private ArrayList users = new ArrayList(); + + public MovieRecommender(String dataFilePath) { + this.dataFilePath = dataFilePath; + + File processedData = new File(CSV_DATA_PATH); + + // if (!processedData.exists()) { + try { + if (processedData.exists()) { processedData.delete(); } + processFile(); + } catch (IOException e) { + e.printStackTrace(); + } + // } else { + // analizeData(); + // } + + } + + private void processFile () throws IOException { + System.out.println("Processing data...\n"); + long start1 = System.nanoTime(); + + FileInputStream file = new FileInputStream(this.dataFilePath); + GZIPInputStream gzip = new GZIPInputStream(file); + InputStreamReader isr = new InputStreamReader(gzip); + BufferedReader br = new BufferedReader(isr); + + List dataParts = new ArrayList(); + String line; + + FileWriter fileWriter = new FileWriter(CSV_DATA_PATH); + + while ((line = br.readLine()) != null) { + String[] elements = {"product/productId:", "review/userId:", "review/score:"}; + + String[] parts = line.split(" "); + + Boolean idRequiredField = Arrays.asList(elements).contains(parts[0]); + + if (idRequiredField) { + dataParts.add(parts[1]); + } + + if (dataParts.size() == 3) { + String userId = dataParts.get(1); + String productId = dataParts.get(0); + String score = dataParts.get(2); + + int productIdxVal = (this.products.containsKey(productId) ? this.products.get(productId) : totalProducts); + int userIdxVal = (this.users.containsKey(userId) ? this.users.get(userId) : totalUsers); + + String mixedData = userIdxVal + "," + productIdxVal + "," + score + "\n"; + fileWriter.write(mixedData); + + this.setOccurrences(productId, userId); + + dataParts = new ArrayList(); + } + } + + fileWriter.close(); + long end1 = System.nanoTime(); + System.out.println("Elapsed Time in seconds: " + ((end1 - start1) * 0.000000001)); + } + + // private void analizeData() { + // System.out.println("Analyzing data...\n"); + // try { + // FileReader fileReader = new FileReader(new File(CSV_DATA_PATH)); + // BufferedReader br = new BufferedReader(fileReader); + // String line = ""; + + // while ((line = br.readLine()) != null) { + // this.setOccurrences(line); + // } + + // } catch (IOException e) { + // e.printStackTrace(); + // } + // } + + private void setOccurrences (String productId, String userId) { + // Count reviews + this.totalReviews++; + + // Count products + if (!this.products.containsKey(productId)) { + this.products.put(productId, totalProducts); + this.productsReverse.put(totalProducts, productId); + this.totalProducts++; + } + + // Count users + if (!this.users.containsKey(userId)) { + this.users.put(userId, totalUsers); + this.totalUsers++; + } + } + + public int getTotalReviews() { + return this.totalReviews; + } + + public int getTotalProducts () { + return this.totalProducts; + } + + public int getTotalUsers () { + return this.totalUsers; + } + + public List getRecommendationsForUser (String userID) throws TasteException { + List recommendations = new ArrayList(); + + try { + DataModel model = new FileDataModel(new File(CSV_DATA_PATH)); + + UserSimilarity similarity = new PearsonCorrelationSimilarity(model); + + UserNeighborhood neighborhood = new ThresholdUserNeighborhood(0.1, similarity, model); + + UserBasedRecommender recommender = new GenericUserBasedRecommender(model, neighborhood, similarity); + + long user = users.get(userID); + + System.out.println("User: " + userID + ": " + user); + + List recommendationss = recommender.recommend(user, 3); + + System.out.println("Number of recomendations: " + recommendations.size()); + + for (RecommendedItem recommendation : recommendationss) { + int idOfProduct = (int) recommendation.getItemID(); + recommendations.add(productsReverse.get(idOfProduct)); + } + + } catch (IOException e) { + e.printStackTrace(); + } + + return recommendations; + } + + // public static void main(String[] args) throws IOException, TasteException { + // String ROOT_PATH = System.getProperty("user.dir"); + + // MovieRecommender mr = new MovieRecommender(ROOT_PATH + "/src/main/resources/movies.txt.gz"); + + // System.out.println("Total reviews: " + mr.getTotalReviews() + "\n"); + // System.out.println("Total users: " + mr.getTotalUsers() + "\n"); + // System.out.println("Total products: " + mr.getTotalProducts() + "\n"); + // mr.getRecommendationsForUser("A141HP4LYPWMSR"); + // } + + class User { + public long index; + public String id; + } + + class Product { + public long index; + public String id; + } + +} diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java index 0d0b1fe..09b06f7 100644 --- a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java +++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java @@ -3,6 +3,8 @@ import org.apache.mahout.cf.taste.common.TasteException; import org.junit.Test; +import nearsoft.academy.MovieRecommender; + import java.io.IOException; import java.util.List; @@ -13,9 +15,10 @@ public class MovieRecommenderTest { @Test public void testDataInfo() throws IOException, TasteException { - //download movies.txt.gz from - // http://snap.stanford.edu/data/web-Movies.html - MovieRecommender recommender = new MovieRecommender("/path/to/movies.txt.gz"); + + String ROOT_PATH = System.getProperty("user.dir"); + + MovieRecommender recommender = new MovieRecommender(ROOT_PATH + "/src/main/resources/movies.txt.gz"); assertEquals(7911684, recommender.getTotalReviews()); assertEquals(253059, recommender.getTotalProducts()); assertEquals(889176, recommender.getTotalUsers()); @@ -26,5 +29,4 @@ public void testDataInfo() throws IOException, TasteException { assertThat(recommendations, hasItem("B000063W82")); } - } From abde2561b92ba36f66049f07c5f4d2bc56da6b17 Mon Sep 17 00:00:00 2001 From: Edwin Fajardo Date: Thu, 21 Oct 2021 13:31:42 -0500 Subject: [PATCH 2/4] Removed unused variables, imports and commented code --- .../nearsoft/academy/MovieRecommender.java | 42 ------------------- 1 file changed, 42 deletions(-) diff --git a/src/main/java/nearsoft/academy/MovieRecommender.java b/src/main/java/nearsoft/academy/MovieRecommender.java index 0c2db02..a8d3f9e 100644 --- a/src/main/java/nearsoft/academy/MovieRecommender.java +++ b/src/main/java/nearsoft/academy/MovieRecommender.java @@ -28,8 +28,6 @@ public class MovieRecommender { private static String ROOT_PATH = System.getProperty("user.dir"); private static String CSV_DATA_PATH = ROOT_PATH + "/src/main/resources/data.csv"; - // private static String USER_INDEX = ROOT_PATH + "/src/main/resources/users.csv"; - // private static String PRODUCT_INDEX = ROOT_PATH + "/src/main/resources/products.csv"; private String dataFilePath; private Map products = new HashMap(); @@ -39,29 +37,22 @@ public class MovieRecommender { private int totalProducts = 0; private int totalReviews = 0; - // private ArrayList users = new ArrayList(); - public MovieRecommender(String dataFilePath) { this.dataFilePath = dataFilePath; File processedData = new File(CSV_DATA_PATH); - // if (!processedData.exists()) { try { if (processedData.exists()) { processedData.delete(); } processFile(); } catch (IOException e) { e.printStackTrace(); } - // } else { - // analizeData(); - // } } private void processFile () throws IOException { System.out.println("Processing data...\n"); - long start1 = System.nanoTime(); FileInputStream file = new FileInputStream(this.dataFilePath); GZIPInputStream gzip = new GZIPInputStream(file); @@ -102,26 +93,8 @@ private void processFile () throws IOException { } fileWriter.close(); - long end1 = System.nanoTime(); - System.out.println("Elapsed Time in seconds: " + ((end1 - start1) * 0.000000001)); } - // private void analizeData() { - // System.out.println("Analyzing data...\n"); - // try { - // FileReader fileReader = new FileReader(new File(CSV_DATA_PATH)); - // BufferedReader br = new BufferedReader(fileReader); - // String line = ""; - - // while ((line = br.readLine()) != null) { - // this.setOccurrences(line); - // } - - // } catch (IOException e) { - // e.printStackTrace(); - // } - // } - private void setOccurrences (String productId, String userId) { // Count reviews this.totalReviews++; @@ -166,12 +139,8 @@ public List getRecommendationsForUser (String userID) throws TasteExcept long user = users.get(userID); - System.out.println("User: " + userID + ": " + user); - List recommendationss = recommender.recommend(user, 3); - System.out.println("Number of recomendations: " + recommendations.size()); - for (RecommendedItem recommendation : recommendationss) { int idOfProduct = (int) recommendation.getItemID(); recommendations.add(productsReverse.get(idOfProduct)); @@ -184,17 +153,6 @@ public List getRecommendationsForUser (String userID) throws TasteExcept return recommendations; } - // public static void main(String[] args) throws IOException, TasteException { - // String ROOT_PATH = System.getProperty("user.dir"); - - // MovieRecommender mr = new MovieRecommender(ROOT_PATH + "/src/main/resources/movies.txt.gz"); - - // System.out.println("Total reviews: " + mr.getTotalReviews() + "\n"); - // System.out.println("Total users: " + mr.getTotalUsers() + "\n"); - // System.out.println("Total products: " + mr.getTotalProducts() + "\n"); - // mr.getRecommendationsForUser("A141HP4LYPWMSR"); - // } - class User { public long index; public String id; From 1e4e77430093642468963cb6a12b55f3196b338c Mon Sep 17 00:00:00 2001 From: Edwin Fajardo Date: Mon, 25 Oct 2021 00:17:27 -0500 Subject: [PATCH 3/4] Removed unused sub classes --- .../nearsoft/academy/MovieRecommender.java | 54 +++++++------------ 1 file changed, 19 insertions(+), 35 deletions(-) diff --git a/src/main/java/nearsoft/academy/MovieRecommender.java b/src/main/java/nearsoft/academy/MovieRecommender.java index a8d3f9e..af4df6b 100644 --- a/src/main/java/nearsoft/academy/MovieRecommender.java +++ b/src/main/java/nearsoft/academy/MovieRecommender.java @@ -37,7 +37,12 @@ public class MovieRecommender { private int totalProducts = 0; private int totalReviews = 0; - public MovieRecommender(String dataFilePath) { + private DataModel model; + private UserSimilarity similarity; + private UserNeighborhood neighborhood; + private UserBasedRecommender recommender; + + public MovieRecommender(String dataFilePath) throws IOException, TasteException { this.dataFilePath = dataFilePath; File processedData = new File(CSV_DATA_PATH); @@ -49,16 +54,18 @@ public MovieRecommender(String dataFilePath) { e.printStackTrace(); } + this.model = new FileDataModel(new File(CSV_DATA_PATH)); + this.similarity = new PearsonCorrelationSimilarity(this.model); + this.neighborhood = new ThresholdUserNeighborhood(0.1, this.similarity, this.model); + this.recommender = new GenericUserBasedRecommender(this.model, this.neighborhood, this.similarity); } private void processFile () throws IOException { - System.out.println("Processing data...\n"); - FileInputStream file = new FileInputStream(this.dataFilePath); GZIPInputStream gzip = new GZIPInputStream(file); InputStreamReader isr = new InputStreamReader(gzip); BufferedReader br = new BufferedReader(isr); - + List dataParts = new ArrayList(); String line; @@ -85,7 +92,7 @@ private void processFile () throws IOException { String mixedData = userIdxVal + "," + productIdxVal + "," + score + "\n"; fileWriter.write(mixedData); - + this.setOccurrences(productId, userId); dataParts = new ArrayList(); @@ -93,6 +100,7 @@ private void processFile () throws IOException { } fileWriter.close(); + br.close(); } private void setOccurrences (String productId, String userId) { @@ -128,39 +136,15 @@ public int getTotalUsers () { public List getRecommendationsForUser (String userID) throws TasteException { List recommendations = new ArrayList(); - try { - DataModel model = new FileDataModel(new File(CSV_DATA_PATH)); - - UserSimilarity similarity = new PearsonCorrelationSimilarity(model); - - UserNeighborhood neighborhood = new ThresholdUserNeighborhood(0.1, similarity, model); - - UserBasedRecommender recommender = new GenericUserBasedRecommender(model, neighborhood, similarity); - - long user = users.get(userID); + long userIdx = users.get(userID); - List recommendationss = recommender.recommend(user, 3); + List recommendationss = recommender.recommend(userIdx, 3); - for (RecommendedItem recommendation : recommendationss) { - int idOfProduct = (int) recommendation.getItemID(); - recommendations.add(productsReverse.get(idOfProduct)); - } - - } catch (IOException e) { - e.printStackTrace(); + for (RecommendedItem recommendation : recommendationss) { + int idOfProduct = (int) recommendation.getItemID(); + recommendations.add(productsReverse.get(idOfProduct)); } - - return recommendations; - } - - class User { - public long index; - public String id; - } - class Product { - public long index; - public String id; + return recommendations; } - } From 4e6916cb6bbf7c6314415c0eda3be5c12dd5ad61 Mon Sep 17 00:00:00 2001 From: Edwin Fajardo Date: Mon, 25 Oct 2021 01:39:15 -0500 Subject: [PATCH 4/4] Using stream and map instead of foreach --- pom.xml | 4 +-- .../nearsoft/academy/MovieRecommender.java | 27 +++++++++---------- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/pom.xml b/pom.xml index 9b4ec34..70f4f27 100644 --- a/pom.xml +++ b/pom.xml @@ -12,8 +12,8 @@ UTF-8 - 1.8 - 1.8 + 11 + 11 diff --git a/src/main/java/nearsoft/academy/MovieRecommender.java b/src/main/java/nearsoft/academy/MovieRecommender.java index af4df6b..9f61efc 100644 --- a/src/main/java/nearsoft/academy/MovieRecommender.java +++ b/src/main/java/nearsoft/academy/MovieRecommender.java @@ -11,8 +11,11 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; import java.util.zip.GZIPInputStream; +import org.apache.commons.collections.BidiMap; +import org.apache.commons.collections.bidimap.DualHashBidiMap; import org.apache.mahout.cf.taste.common.TasteException; import org.apache.mahout.cf.taste.impl.model.file.FileDataModel; import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood; @@ -20,7 +23,6 @@ import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity; import org.apache.mahout.cf.taste.model.DataModel; import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood; -import org.apache.mahout.cf.taste.recommender.RecommendedItem; import org.apache.mahout.cf.taste.recommender.UserBasedRecommender; import org.apache.mahout.cf.taste.similarity.UserSimilarity; @@ -30,9 +32,10 @@ public class MovieRecommender { private static String CSV_DATA_PATH = ROOT_PATH + "/src/main/resources/data.csv"; private String dataFilePath; - private Map products = new HashMap(); - private Map productsReverse = new HashMap(); - private Map users = new HashMap(); + + private BidiMap products = new DualHashBidiMap(); + private Map users = new HashMap(); + private int totalUsers = 0; private int totalProducts = 0; private int totalReviews = 0; @@ -87,7 +90,7 @@ private void processFile () throws IOException { String productId = dataParts.get(0); String score = dataParts.get(2); - int productIdxVal = (this.products.containsKey(productId) ? this.products.get(productId) : totalProducts); + int productIdxVal = (this.products.containsKey(productId) ? (int) this.products.get(productId) : totalProducts); int userIdxVal = (this.users.containsKey(userId) ? this.users.get(userId) : totalUsers); String mixedData = userIdxVal + "," + productIdxVal + "," + score + "\n"; @@ -110,7 +113,6 @@ private void setOccurrences (String productId, String userId) { // Count products if (!this.products.containsKey(productId)) { this.products.put(productId, totalProducts); - this.productsReverse.put(totalProducts, productId); this.totalProducts++; } @@ -136,14 +138,11 @@ public int getTotalUsers () { public List getRecommendationsForUser (String userID) throws TasteException { List recommendations = new ArrayList(); - long userIdx = users.get(userID); - - List recommendationss = recommender.recommend(userIdx, 3); - - for (RecommendedItem recommendation : recommendationss) { - int idOfProduct = (int) recommendation.getItemID(); - recommendations.add(productsReverse.get(idOfProduct)); - } + recommendations = recommender.recommend(users.get(userID), 3) + .stream() + .map(item -> (String) products.getKey((int) item.getItemID())) + .collect(Collectors.toList() + ); return recommendations; }