diff --git a/README.md b/README.md index e57375e..4f37a20 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,9 @@ # ramblebot + +//EDIT BY EMILY M!! EDITS ARE WORKING!! + + A project to exercise Java, JUnit, git, GitHub, and code-reading skills. Students will create a language model to generate text. ## Expectations @@ -43,7 +47,7 @@ This is a large, difficult project. Start early, and get help when you need it. Sometimes this button takes a little bit to show up when you first open VS Code. If you're not seeing it, make sure you have the Java extension pack installed and it is active. 1. It should ask you for a filename. Give it the following filename: ``` - wikipediaData.txt + keatsTraining.txt ``` Then hit enter. 1. It should ask you for a number of words. Enter a positive integer and hit enter. diff --git a/funnyCourtTranscripts.txt b/funnyCourtTranscripts.txt new file mode 100644 index 0000000..9619a05 --- /dev/null +++ b/funnyCourtTranscripts.txt @@ -0,0 +1,56 @@ +From a book called: Disorder in the American Courts by Marcelle Boren + +ATTORNEY: What was the first thing your husband said to you that morning? +WITNESS: He said, Where am I, Cathy? +ATTORNEY: And why did that upset you? +WITNESS: My name is Susan! + +ATTORNEY: What gear were you in at the moment of the impact? +WITNESS: Gucci sweats and Reeboks. + +ATTORNEY: Are you sexually active? +WITNESS: No, I just lie there. + +ATTORNEY: What is your date of birth? +WITNESS: July 18th. +ATTORNEY: What year? +WITNESS: Every year. + +ATTORNEY: How old is your son, the one living with you? +WITNESS: Thirty-eight or thirty-five, I can't remember which. +ATTORNEY: How long has he lived with you? +WITNESS: Forty-five years. + +ATTORNEY: This myasthenia gravis, does it affect your memory at all? +WITNESS: Yes. +ATTORNEY: And in what ways does it affect your memory? +WITNESS: I forget. +ATTORNEY: You forget? Can you give us an example of something you forgot? + +ATTORNEY: Now doctor, isn't it true that when a person dies in his sleep, he doesn't know about it until the next morning? +WITNESS: Did you actually pass the bar exam? + +ATTORNEY: The youngest son, the 20-year-old, how old is he? +WITNESS: He's 20, much like your IQ. + +ATTORNEY: Were you present when your picture was taken? +WITNESS: Are you shitting me? + +ATTORNEY: So the date of conception (of the baby) was August 8th? +WITNESS: Yes. +ATTORNEY: And what were you doing at that time? +WITNESS: Getting laid. + +ATTORNEY: How was your first marriage terminated? +WITNESS: By death. +ATTORNEY: And by whose death was it terminated? +WITNESS: Take a guess. + +ATTORNEY: Is your appearance here this morning pursuant to a deposition notice which I sent to your attorney? +WITNESS: No, this is how I dress when I go to work. + +ATTORNEY: Doctor, how many of your autopsies have you performed on dead people? +WITNESS: All of them. The live ones put up too much of a fight. + +ATTORNEY: ALL your responses MUST be oral, OK? What school did you go to? +WITNESS: Oral. \ No newline at end of file diff --git a/ramblebotOutput.txt b/ramblebotOutput.txt new file mode 100644 index 0000000..97442c2 --- /dev/null +++ b/ramblebotOutput.txt @@ -0,0 +1,21 @@ +attorney: what school did you present when a fight . attorney: what was your responses must be oral, ok? what school did you performed on dead people? witness: july 18th . attorney: doctor, how i dress when a fight . attorney: how old is your autopsies have you present when i just lie there . attorney: and what gear were you sexually active? witness: july 18th . attorney: and reeboks . attorney: what ways does it affect your iq . attorney: and what gear were you performed on dead people? witness: he's 20, much like your picture was it terminated? +//not sure why there is a little space before the period :C + +//Output spread out for reading convenience: + +attorney: what school did you present when a fight . +attorney: what was your responses must be oral, ok? what school did you performed on dead people? + +witness: july 18th . + +attorney: doctor, how i dress when a fight . +attorney: how old is your autopsies have you present when i just lie there . +attorney: and what gear were you sexually active? + +witness: july 18th . + +attorney: and reeboks . +attorney: what ways does it affect your iq . +attorney: and what gear were you performed on dead people? + +witness: he's 20, much like your picture was it terminated? \ No newline at end of file diff --git a/src/LowercaseSentenceTokenizer.java b/src/LowercaseSentenceTokenizer.java index cc8285d..4d0213c 100644 --- a/src/LowercaseSentenceTokenizer.java +++ b/src/LowercaseSentenceTokenizer.java @@ -1,3 +1,4 @@ +import java.util.ArrayList; import java.util.List; import java.util.Scanner; @@ -28,9 +29,29 @@ public class LowercaseSentenceTokenizer implements Tokenizer { * @param scanner the Scanner to read the input text from * @return a list of tokens, where each token is a word or a period */ - public List tokenize(Scanner scanner) { - // TODO: Implement this function to convert the scanner's input to a list of words and periods - return null; + + @Override //forgot to add an override before oopsie + public List tokenize(Scanner scanner) { + + List tokens = new ArrayList<>(); //makes an arrayList for the tokens to be placed into + while(scanner.hasNext()){ + String rambleWord = scanner.next().toLowerCase(); + int length = getLength(rambleWord); + + if (length > 0 && rambleWord.charAt(length-1) == '.' && length > 1){ + tokens.add(rambleWord.substring(0,length-1)); //adds word, removes end period + tokens.add("."); //readds period as a different token + } + else { + tokens.add(rambleWord); + } + } + return tokens; + } + +public int getLength(String string){ + return string.length(); } -} + +} //end LowerCaseTokenizer.java diff --git a/src/LowercaseSentenceTokenizerTest.java b/src/LowercaseSentenceTokenizerTest.java index 85ac3a2..06dddcc 100644 --- a/src/LowercaseSentenceTokenizerTest.java +++ b/src/LowercaseSentenceTokenizerTest.java @@ -16,9 +16,18 @@ void testTokenizeWithNoCapitalizationOrPeriod() { } // Wave 2 - /* - * Write your test here! - */ + @Test + void testTokenizeSentenceWithSpaces(){ + + LowercaseSentenceTokenizer tokenizer = new LowercaseSentenceTokenizer(); + Scanner scanner = new Scanner("this is a lowercase sentence with many spaces"); + List tokens = tokenizer.tokenize(scanner); + + assertEquals(List.of("this", "is", "a", "lowercase", "sentence", "with", "many", "spaces"), tokens); + + } + + // Wave 3 diff --git a/src/UnigramWordPredictor.java b/src/UnigramWordPredictor.java index d713250..635eff2 100644 --- a/src/UnigramWordPredictor.java +++ b/src/UnigramWordPredictor.java @@ -2,6 +2,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Random; import java.util.Scanner; /** @@ -48,11 +49,21 @@ public UnigramWordPredictor(Tokenizer tokenizer) { * * @param scanner the Scanner to read the training text from */ +@Override //added override public void train(Scanner scanner) { List trainingWords = tokenizer.tokenize(scanner); - // TODO: Convert the trainingWords into neighborMap here - } + neighborMap = new HashMap<>(); //makes a map for all the strings and lists + for(int i = 0; i < trainingWords.size() - 1; i++){ + String currentWord = trainingWords.get(i); + String nextWord = trainingWords.get(i +1); + + if (!neighborMap.containsKey(currentWord)){ + neighborMap.put(currentWord, new ArrayList<>()); + }//end if + neighborMap.get(currentWord).add(nextWord); + }//end for + }//end scanner /** * Predicts the next word based on the given context. @@ -98,11 +109,23 @@ public void train(Scanner scanner) { * @param context a list of words representing the current context * @return the predicted next word, or null if no prediction can be made */ + @Override //added override public String predictNextWord(List context) { - // TODO: Return a predicted word given the words preceding it - // Hint: only the last word in context should be looked at - return null; + + String lastWord = context.get(context.size() -1); + List nextWord = neighborMap.get(lastWord); + + if (nextWord == null || nextWord.isEmpty()){ //if nextWord is null or empty return null + return null; + } + + Random rand = new Random(); //didn't copy and paste anything, but I read up on how to use Random with this site: https://www.geeksforgeeks.org/generating-random-numbers-in-java/ + int randomRamble = rand.nextInt(nextWord.size()); + + return nextWord.get(randomRamble); + } + /** * Returns a copy of the neighbor map. The neighbor map is a mapping