-
Notifications
You must be signed in to change notification settings - Fork 27
Project: ramblebot #28
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,3 +1,6 @@ | ||
|
|
||
|
|
||
| import java.util.ArrayList; | ||
| import java.util.List; | ||
| import java.util.Scanner; | ||
|
|
||
|
|
@@ -28,9 +31,40 @@ public class LowercaseSentenceTokenizer implements Tokenizer { | |
| * @param scanner the Scanner to read the input text from | ||
| * @return a list of tokens, where each token is a word or a period | ||
| */ | ||
| public List<String> tokenize(Scanner scanner) { | ||
| // TODO: Implement this function to convert the scanner's input to a list of words and periods | ||
| return null; | ||
| } | ||
| /* TODO: Implement this function to convert the scanner's input to a list of words and periods | ||
| return null; */ | ||
|
|
||
|
|
||
| @Override | ||
| public List<String> tokenize(Scanner scanner) { | ||
| List<String> tokens = new ArrayList<>(); | ||
|
|
||
| String input = scanner.nextLine().toLowerCase(); | ||
|
|
||
| // Use "\\s+" to split by any number of spaces (removes empty tokens) | ||
| String[] words = input.split("\\s+"); | ||
|
|
||
| for (String word : words) { | ||
| if (word.endsWith(".")) { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Smart! |
||
| // I ues the substring in this website https://www.w3schools.com/jsref/jsref_substring.asp | ||
| /* explain what this code do... | ||
| This code reads a sentence, converts it to lowercase, and splits it into words. It then checks | ||
| if any word ends with a period (.). If there is a period at the end of a word, the period is | ||
| separated and stored as its own token. Anything without a period gets directly added to the list. Finally, | ||
| This function separates the words from the period as different tokens in a list. | ||
| */ | ||
| String withoutPeriod = word.substring(0, word.length() - 1); | ||
| if (!withoutPeriod.isEmpty()) { | ||
| tokens.add(withoutPeriod); // Add the word without the period | ||
| } | ||
| tokens.add("."); // Add the period as a separate token | ||
| } else { | ||
| tokens.add(word); | ||
| } | ||
| } | ||
|
|
||
| return tokens; | ||
| } | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,6 +20,16 @@ void testTokenizeWithNoCapitalizationOrPeriod() { | |
| * Write your test here! | ||
| */ | ||
|
|
||
| @Test | ||
| void testTokenizeWithExtraSpaces() { | ||
| LowercaseSentenceTokenizer tokenizer = new LowercaseSentenceTokenizer(); | ||
| Scanner scanner = new Scanner("hello hi hi hi hello hello"); | ||
| List<String> tokens = tokenizer.tokenize(scanner); | ||
|
|
||
| assertEquals(List.of("hello", "hi", "hi", "hi", "hello", "hello"), tokens); | ||
| } | ||
|
Comment on lines
+23
to
+30
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice test! |
||
|
|
||
|
|
||
|
|
||
| // Wave 3 | ||
| @Test | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,6 +2,7 @@ | |
| import java.util.HashMap; | ||
| import java.util.List; | ||
| import java.util.Map; | ||
| import java.util.Random; | ||
| import java.util.Scanner; | ||
|
|
||
| /** | ||
|
|
@@ -10,8 +11,9 @@ | |
| * words that directly follow it in the text. | ||
| */ | ||
| public class UnigramWordPredictor implements WordPredictor { | ||
| private Map<String, List<String>> neighborMap; | ||
| private Map<String, List<String>> neighborMap; | ||
| private Tokenizer tokenizer; | ||
| private Random random; | ||
|
|
||
| /** | ||
| * Constructs a UnigramWordPredictor with the specified tokenizer. | ||
|
|
@@ -20,6 +22,8 @@ public class UnigramWordPredictor implements WordPredictor { | |
| */ | ||
| public UnigramWordPredictor(Tokenizer tokenizer) { | ||
| this.tokenizer = tokenizer; | ||
| this.neighborMap = new HashMap<>(); | ||
| this.random = new Random(); | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -29,29 +33,27 @@ public UnigramWordPredictor(Tokenizer tokenizer) { | |
| * in the text. The resultant map is stored in the neighborMap | ||
| * instance variable. | ||
| * | ||
| * For example: | ||
| * If the input text is: "The cat sat. The cat slept. The dog barked." | ||
| * After tokenizing, the tokens would be: ["the", "cat", "sat", ".", "the", "cat", "slept", ".", "the", "dog", "barked", "."] | ||
| * | ||
| * The resulting map (neighborMap) would be: | ||
| * { | ||
| * "the" -> ["cat", "cat", "dog"], | ||
| * "cat" -> ["sat", "slept"], | ||
| * "sat" -> ["."], | ||
| * "." -> ["the", "the"], | ||
| * "slept" -> ["."], | ||
| * "dog" -> ["barked"], | ||
| * "barked" -> ["."] | ||
| * } | ||
| * | ||
| * The order of the map and the order of each list is not important. | ||
| * | ||
| * @param scanner the Scanner to read the training text from | ||
| */ | ||
| public void train(Scanner scanner) { | ||
| List<String> trainingWords = tokenizer.tokenize(scanner); | ||
|
|
||
| // TODO: Convert the trainingWords into neighborMap here | ||
| if (trainingWords.isEmpty()) { | ||
| return; | ||
| } | ||
|
|
||
| for (int i = 0; i < trainingWords.size() - 1; i++) { | ||
| String currentWord = trainingWords.get(i); | ||
| String nextWord = trainingWords.get(i + 1); | ||
|
|
||
| // this code mean ...if currentWord is not already in neighborMap, add it with an empty list as its value. | ||
|
|
||
|
|
||
| neighborMap.putIfAbsent(currentWord, new ArrayList<>()); | ||
|
|
||
| // Store the word that follows | ||
| neighborMap.get(currentWord).add(nextWord); | ||
| } | ||
|
Comment on lines
+45
to
+56
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice! |
||
| } | ||
|
|
||
| /** | ||
|
|
@@ -98,18 +100,37 @@ public void train(Scanner scanner) { | |
| * @param context a list of words representing the current context | ||
| * @return the predicted next word, or null if no prediction can be made | ||
| */ | ||
|
|
||
| /* explain the code what do ..The predictNextWord method takes a list of | ||
| words as context and predicts the next word. It first checks if the context is empty and | ||
| returns null if so. Then, it retrieves the last word from the list and looks it up in neighborMap, | ||
| which stores words and their possible next words from training data. If the last word exists in the map | ||
| and has a list of next words, it randomly selects one based on frequency. If no match is found, it returns | ||
| null. | ||
| */ | ||
| /* I ues this link to learn on this https://stackoverflow.com/questions/4672806/java-simplest-way-to-get-last-word-in-a-string */ | ||
| public String predictNextWord(List<String> context) { | ||
| // TODO: Return a predicted word given the words preceding it | ||
| // Hint: only the last word in context should be looked at | ||
| return null; | ||
| if (context.isEmpty()) { | ||
| return null; | ||
| } | ||
|
|
||
| String lastWord = context.get(context.size() - 1); // Get last word | ||
|
|
||
| if (neighborMap.containsKey(lastWord)) { | ||
| List<String> nextWords = neighborMap.get(lastWord); | ||
|
|
||
| if (!nextWords.isEmpty()) { | ||
| return nextWords.get(random.nextInt(nextWords.size())); | ||
| } | ||
| } | ||
|
Comment on lines
+117
to
+125
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice logic |
||
|
|
||
| return null; | ||
| } | ||
|
|
||
| /** | ||
| * Returns a copy of the neighbor map. The neighbor map is a mapping | ||
| * from each word to a list of words that have followed it in the training data. | ||
| * | ||
| * You do not need to modify this method for your project. | ||
| * | ||
| * @return a copy of the neighbor map | ||
| */ | ||
| public Map<String, List<String>> getNeighborMap() { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this only reads in a single line of input and doesn't handle multi-line input.