Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,23 @@

<name>big-data</name>
<url>http://maven.apache.org</url>

<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
</plugins>
</build>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
</properties>

<dependencies>
<dependency>
<groupId>org.apache.mahout</groupId>
Expand Down
Binary file added src/data/movies.txt.gz
Binary file not shown.
125 changes: 125 additions & 0 deletions src/main/java/movierec/MovieRecommender.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
// this class works with the src/data/movies.txt.gz compressed file as input
// the src/data/movies.txt.gz current file is a provisional one for storage reasons
// so, in order to pass the test properly, it needs to be replaced with the original 3+ GB file
// ... with the correct file, this class generates an intermediate CSV file with clean data,
// which is about 150 MB big

package movierec;

import java.io.*;
import java.util.zip.GZIPInputStream;
import java.util.ArrayList;
import java.util.Hashtable;
import java.util.List;

import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood;
import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.UserBasedRecommender;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;

public class MovieRecommender {
int totalProducts;
int totalUsers;
int totalReviews;
// keep hash tables to be able to 'translate' between numeric and alphanum. IDs
Hashtable<String,Integer> products;
Hashtable<String,Integer> users;
Hashtable<Integer,String> productsByID;
String myPath;
String output;

public MovieRecommender(String pathTidyFile) throws Exception {
this.totalProducts = 0;
this.totalUsers = 0;
this.totalReviews = 0;
this.products = new Hashtable<String,Integer>();
this.users = new Hashtable<String,Integer>();
this.productsByID = new Hashtable<Integer,String>();
this.myPath = pathTidyFile;
this.output = "src/data/nice_data.csv";
dataWrangling();
}

// method for cleaning and re-arranging the data so that it is a valid input
// for the mahout FileDataModel class
public void dataWrangling () throws IOException {
InputStream gzipStream = new GZIPInputStream(new FileInputStream(this.myPath));
BufferedReader buffered = new BufferedReader(new InputStreamReader(gzipStream));
FileWriter csvWriter = new FileWriter(this.output);

String auxLine = "";
String productStr = "product/productId: ";
String userStr = "review/userId: ";
String scoreStr = "review/score: ";
String user = "";
String product = "";
String score = "";

while (auxLine != null) {
if (auxLine.contains(userStr)) {
user = auxLine.split(" ")[1];
if(this.users.get(user) == null ) {
this.totalUsers ++;
this.users.put(user,this.totalUsers);
}
}
if (auxLine.contains(productStr)) {
product = auxLine.split(" ")[1];
if(this.products.get(product) == null ) {
this.totalProducts ++;
this.products.put(product,this.totalProducts);
this.productsByID.put(this.totalProducts,product);
}
}
if (auxLine.contains(scoreStr)) {
score = auxLine.split(" ")[1];
this.totalReviews ++;
}
if (user != "" && product != "" && score != "") {
csvWriter.write(this.users.get(user) + "," + this.products.get(product) + "," + score + "\n");
user = "";
product = "";
score = "";
}
auxLine = buffered.readLine();
}

buffered.close();
csvWriter.close();

}

// this method gets 3 item recommendations as output for a given user as input
public List<String> getRecommendationsForUser(String userID) throws IOException, TasteException{
DataModel model = new FileDataModel(new File(this.output));
UserSimilarity similarity = new PearsonCorrelationSimilarity(model);
UserNeighborhood neighborhood = new ThresholdUserNeighborhood(0.1, similarity, model);
UserBasedRecommender recommender =
new GenericUserBasedRecommender(model, neighborhood, similarity);
List<String> recommendations = new ArrayList<String>();
for (RecommendedItem recommendation : recommender.recommend(this.users.get(userID), 3)) {
recommendations.add(this.productsByID.get((int )(recommendation.getItemID())));
}
return recommendations;

}

public int getTotalReviews() {
return totalReviews;
}

public int getTotalProducts() {
return totalProducts;
}

public int getTotalUsers() {
return totalUsers;
}
}

Original file line number Diff line number Diff line change
@@ -1,30 +1,30 @@
// PLEASE CONSIDER THAT...
// in order to pass the test properly,
// the current rc/data/movies.txt.gz file needs to be replaced with the original 3+ GB file
package nearsoft.academy.bigdata.recommendation;

import org.apache.mahout.cf.taste.common.TasteException;
import movierec.MovieRecommender;
import org.junit.Test;

import java.io.IOException;
import java.util.List;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertThat;
import static org.junit.matchers.JUnitMatchers.hasItem;


public class MovieRecommenderTest {
@Test
public void testDataInfo() throws IOException, TasteException {
public void testDataInfo() throws Exception {
//download movies.txt.gz from
// http://snap.stanford.edu/data/web-Movies.html
MovieRecommender recommender = new MovieRecommender("/path/to/movies.txt.gz");
assertEquals(7911684, recommender.getTotalReviews());

MovieRecommender recommender = new MovieRecommender("src/data/movies.txt.gz");
assertEquals(7911684, recommender.getTotalReviews()); // atributos clase principal
assertEquals(253059, recommender.getTotalProducts());
assertEquals(889176, recommender.getTotalUsers());

List<String> recommendations = recommender.getRecommendationsForUser("A141HP4LYPWMSR");
assertThat(recommendations, hasItem("B0002O7Y8U"));
assertThat(recommendations, hasItem("B00004CQTF"));
assertThat(recommendations, hasItem("B000063W82"));

}

}
}