From cdc15b4d703e1c7b614c904771a13bccf834f76a Mon Sep 17 00:00:00 2001 From: EmilyM <155667566+EmilyMenken@user.noreply.github.com> Date: Tue, 21 Jan 2025 11:44:49 -0800 Subject: [PATCH 1/9] Added comment in README to ensure edits are working --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index e57375e..529d3d4 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,9 @@ # ramblebot + +//EDIT BY EMILY M!! EDITS ARE WORKING!! + + A project to exercise Java, JUnit, git, GitHub, and code-reading skills. Students will create a language model to generate text. ## Expectations From b1d2a877a36e986ad0552f6205ba0d829f4bf7ae Mon Sep 17 00:00:00 2001 From: EmilyM <155667566+EmilyMenken@user.noreply.github.com> Date: Tue, 21 Jan 2025 11:51:32 -0800 Subject: [PATCH 2/9] Changed text in README that had a previous filename --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 529d3d4..4f37a20 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,7 @@ This is a large, difficult project. Start early, and get help when you need it. Sometimes this button takes a little bit to show up when you first open VS Code. If you're not seeing it, make sure you have the Java extension pack installed and it is active. 1. It should ask you for a filename. Give it the following filename: ``` - wikipediaData.txt + keatsTraining.txt ``` Then hit enter. 1. It should ask you for a number of words. Enter a positive integer and hit enter. From bc1be4c7a1c4f971c55fce044828fe3b21d5f2b1 Mon Sep 17 00:00:00 2001 From: EmilyM <155667566+EmilyMenken@user.noreply.github.com> Date: Thu, 30 Jan 2025 03:09:21 -0800 Subject: [PATCH 3/9] Finsihe dWave 1 --- src/LowercaseSentenceTokenizer.java | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/LowercaseSentenceTokenizer.java b/src/LowercaseSentenceTokenizer.java index cc8285d..01f89a0 100644 --- a/src/LowercaseSentenceTokenizer.java +++ b/src/LowercaseSentenceTokenizer.java @@ -1,3 +1,4 @@ +import java.util.ArrayList; import java.util.List; import java.util.Scanner; @@ -28,9 +29,14 @@ public class LowercaseSentenceTokenizer implements Tokenizer { * @param scanner the Scanner to read the input text from * @return a list of tokens, where each token is a word or a period */ - public List tokenize(Scanner scanner) { - // TODO: Implement this function to convert the scanner's input to a list of words and periods - return null; - } +public List tokenize(Scanner scanner) { + + List tokens = new ArrayList<>(); //makes an arrayList for the tokens to be placed into + while(scanner.hasNext()){ + tokens.add(scanner.next().toLowerCase()); + } + return tokens; //returns a list of tokes +} + } From e61773985d4962069c3e41fb4498eff44a505c48 Mon Sep 17 00:00:00 2001 From: EmilyM <155667566+EmilyMenken@user.noreply.github.com> Date: Thu, 30 Jan 2025 03:21:36 -0800 Subject: [PATCH 4/9] Finished wave 2, fixed a typo in wave 1 --- src/LowercaseSentenceTokenizerTest.java | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/LowercaseSentenceTokenizerTest.java b/src/LowercaseSentenceTokenizerTest.java index 85ac3a2..06dddcc 100644 --- a/src/LowercaseSentenceTokenizerTest.java +++ b/src/LowercaseSentenceTokenizerTest.java @@ -16,9 +16,18 @@ void testTokenizeWithNoCapitalizationOrPeriod() { } // Wave 2 - /* - * Write your test here! - */ + @Test + void testTokenizeSentenceWithSpaces(){ + + LowercaseSentenceTokenizer tokenizer = new LowercaseSentenceTokenizer(); + Scanner scanner = new Scanner("this is a lowercase sentence with many spaces"); + List tokens = tokenizer.tokenize(scanner); + + assertEquals(List.of("this", "is", "a", "lowercase", "sentence", "with", "many", "spaces"), tokens); + + } + + // Wave 3 From 43268512ec515cf1478a4b70373316d35cbccd89 Mon Sep 17 00:00:00 2001 From: EmilyM <155667566+EmilyMenken@user.noreply.github.com> Date: Thu, 30 Jan 2025 04:11:02 -0800 Subject: [PATCH 5/9] Finished wave 3, all tests working --- src/LowercaseSentenceTokenizer.java | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/src/LowercaseSentenceTokenizer.java b/src/LowercaseSentenceTokenizer.java index 01f89a0..4d0213c 100644 --- a/src/LowercaseSentenceTokenizer.java +++ b/src/LowercaseSentenceTokenizer.java @@ -29,14 +29,29 @@ public class LowercaseSentenceTokenizer implements Tokenizer { * @param scanner the Scanner to read the input text from * @return a list of tokens, where each token is a word or a period */ -public List tokenize(Scanner scanner) { + + @Override //forgot to add an override before oopsie + public List tokenize(Scanner scanner) { List tokens = new ArrayList<>(); //makes an arrayList for the tokens to be placed into while(scanner.hasNext()){ - tokens.add(scanner.next().toLowerCase()); - } - return tokens; //returns a list of tokes -} + String rambleWord = scanner.next().toLowerCase(); + int length = getLength(rambleWord); + + if (length > 0 && rambleWord.charAt(length-1) == '.' && length > 1){ + tokens.add(rambleWord.substring(0,length-1)); //adds word, removes end period + tokens.add("."); //readds period as a different token + } + else { + tokens.add(rambleWord); + } + } + return tokens; + } + +public int getLength(String string){ + return string.length(); + } -} +} //end LowerCaseTokenizer.java From 4c83ab877f2657fba14eeb4a1ba321cd6ce84ede Mon Sep 17 00:00:00 2001 From: EmilyM <155667566+EmilyMenken@user.noreply.github.com> Date: Thu, 30 Jan 2025 05:26:43 -0800 Subject: [PATCH 6/9] Finished wave 4, 1/3 tests working --- src/UnigramWordPredictor.java | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/UnigramWordPredictor.java b/src/UnigramWordPredictor.java index d713250..0466c75 100644 --- a/src/UnigramWordPredictor.java +++ b/src/UnigramWordPredictor.java @@ -48,11 +48,21 @@ public UnigramWordPredictor(Tokenizer tokenizer) { * * @param scanner the Scanner to read the training text from */ +@Override //added override public void train(Scanner scanner) { List trainingWords = tokenizer.tokenize(scanner); - // TODO: Convert the trainingWords into neighborMap here - } + neighborMap = new HashMap<>(); //makes a map for all the strings and lists + for(int i = 0; i < trainingWords.size() - 1; i++){ + String currentWord = trainingWords.get(i); + String nextWord = trainingWords.get(i +1); + + if (!neighborMap.containsKey(currentWord)){ + neighborMap.put(currentWord, new ArrayList<>()); + }//end if + neighborMap.get(currentWord).add(nextWord); + }//end for + }//end scanner /** * Predicts the next word based on the given context. From 8dd6fec7d521c09266b02376eb82249a3ad1fc18 Mon Sep 17 00:00:00 2001 From: EmilyM <155667566+EmilyMenken@user.noreply.github.com> Date: Thu, 30 Jan 2025 05:46:17 -0800 Subject: [PATCH 7/9] Finished wave 5, all tests working --- src/UnigramWordPredictor.java | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/src/UnigramWordPredictor.java b/src/UnigramWordPredictor.java index 0466c75..0029792 100644 --- a/src/UnigramWordPredictor.java +++ b/src/UnigramWordPredictor.java @@ -2,6 +2,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Random; import java.util.Scanner; /** @@ -108,11 +109,23 @@ public void train(Scanner scanner) { * @param context a list of words representing the current context * @return the predicted next word, or null if no prediction can be made */ + @Override //added override public String predictNextWord(List context) { - // TODO: Return a predicted word given the words preceding it - // Hint: only the last word in context should be looked at - return null; + + String lastWord = context.get(context.size() -1); + List nextWord = neighborMap.get(lastWord); + + if (nextWord == null || nextWord.isEmpty()){ + return null; + } + + Random rand = new Random(); //didn't copy and paste anything, but I read up on how to use Random with this site: https://www.geeksforgeeks.org/generating-random-numbers-in-java/ + int randomRamble = rand.nextInt(nextWord.size()); + + return nextWord.get(randomRamble); + } + /** * Returns a copy of the neighbor map. The neighbor map is a mapping From d3492e314835985bd07b7fa940c3c125f892fd8b Mon Sep 17 00:00:00 2001 From: EmilyM <155667566+EmilyMenken@user.noreply.github.com> Date: Thu, 30 Jan 2025 06:05:31 -0800 Subject: [PATCH 8/9] Finished wave 6, added funnyCourtTranscripts.txt + ramblebotOutput.txt --- funnyCourtTranscripts.txt | 54 +++++++++++++++++++++++++++++++++++ ramblebotOutput.txt | 17 +++++++++++ src/UnigramWordPredictor.java | 2 +- 3 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 funnyCourtTranscripts.txt create mode 100644 ramblebotOutput.txt diff --git a/funnyCourtTranscripts.txt b/funnyCourtTranscripts.txt new file mode 100644 index 0000000..a163c8f --- /dev/null +++ b/funnyCourtTranscripts.txt @@ -0,0 +1,54 @@ +ATTORNEY: What was the first thing your husband said to you that morning? +WITNESS: He said, Where am I, Cathy? +ATTORNEY: And why did that upset you? +WITNESS: My name is Susan! + +ATTORNEY: What gear were you in at the moment of the impact? +WITNESS: Gucci sweats and Reeboks. + +ATTORNEY: Are you sexually active? +WITNESS: No, I just lie there. + +ATTORNEY: What is your date of birth? +WITNESS: July 18th. +ATTORNEY: What year? +WITNESS: Every year. + +ATTORNEY: How old is your son, the one living with you? +WITNESS: Thirty-eight or thirty-five, I can't remember which. +ATTORNEY: How long has he lived with you? +WITNESS: Forty-five years. + +ATTORNEY: This myasthenia gravis, does it affect your memory at all? +WITNESS: Yes. +ATTORNEY: And in what ways does it affect your memory? +WITNESS: I forget. +ATTORNEY: You forget? Can you give us an example of something you forgot? + +ATTORNEY: Now doctor, isn't it true that when a person dies in his sleep, he doesn't know about it until the next morning? +WITNESS: Did you actually pass the bar exam? + +ATTORNEY: The youngest son, the 20-year-old, how old is he? +WITNESS: He's 20, much like your IQ. + +ATTORNEY: Were you present when your picture was taken? +WITNESS: Are you shitting me? + +ATTORNEY: So the date of conception (of the baby) was August 8th? +WITNESS: Yes. +ATTORNEY: And what were you doing at that time? +WITNESS: Getting laid. + +ATTORNEY: How was your first marriage terminated? +WITNESS: By death. +ATTORNEY: And by whose death was it terminated? +WITNESS: Take a guess. + +ATTORNEY: Is your appearance here this morning pursuant to a deposition notice which I sent to your attorney? +WITNESS: No, this is how I dress when I go to work. + +ATTORNEY: Doctor, how many of your autopsies have you performed on dead people? +WITNESS: All of them. The live ones put up too much of a fight. + +ATTORNEY: ALL your responses MUST be oral, OK? What school did you go to? +WITNESS: Oral. \ No newline at end of file diff --git a/ramblebotOutput.txt b/ramblebotOutput.txt new file mode 100644 index 0000000..2721b2c --- /dev/null +++ b/ramblebotOutput.txt @@ -0,0 +1,17 @@ +attorney: what school did you present when a fight . attorney: what was your responses must be oral, ok? what school did you performed on dead people? witness: july 18th . attorney: doctor, how i dress when a fight . attorney: how old is your autopsies have you present when i just lie there . attorney: and what gear were you sexually active? witness: july 18th . attorney: and reeboks . attorney: what ways does it affect your iq . attorney: and what gear were you performed on dead people? witness: he's 20, much like your picture was it terminated? +//not sure why there is a little space before the period :C + +//Output spread out for reading convenience: + +attorney: what school did you present when a fight . +attorney: what was your responses must be oral, ok? what school did you performed on dead people? + +witness: july 18th . attorney: doctor, how i dress when a fight . + +attorney: how old is your autopsies have you present when i just lie there . +attorney: and what gear were you sexually active? witness: july 18th . +attorney: and reeboks . +attorney: what ways does it affect your iq . +attorney: and what gear were you performed on dead people? + +witness: he's 20, much like your picture was it terminated? \ No newline at end of file diff --git a/src/UnigramWordPredictor.java b/src/UnigramWordPredictor.java index 0029792..635eff2 100644 --- a/src/UnigramWordPredictor.java +++ b/src/UnigramWordPredictor.java @@ -115,7 +115,7 @@ public String predictNextWord(List context) { String lastWord = context.get(context.size() -1); List nextWord = neighborMap.get(lastWord); - if (nextWord == null || nextWord.isEmpty()){ + if (nextWord == null || nextWord.isEmpty()){ //if nextWord is null or empty return null return null; } From 7ceb6c80d724646ea9c731b54b49e6d65504b9bd Mon Sep 17 00:00:00 2001 From: EmilyM <155667566+EmilyMenken@user.noreply.github.com> Date: Thu, 30 Jan 2025 06:17:07 -0800 Subject: [PATCH 9/9] final touches --- funnyCourtTranscripts.txt | 2 ++ ramblebotOutput.txt | 8 ++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/funnyCourtTranscripts.txt b/funnyCourtTranscripts.txt index a163c8f..9619a05 100644 --- a/funnyCourtTranscripts.txt +++ b/funnyCourtTranscripts.txt @@ -1,3 +1,5 @@ +From a book called: Disorder in the American Courts by Marcelle Boren + ATTORNEY: What was the first thing your husband said to you that morning? WITNESS: He said, Where am I, Cathy? ATTORNEY: And why did that upset you? diff --git a/ramblebotOutput.txt b/ramblebotOutput.txt index 2721b2c..97442c2 100644 --- a/ramblebotOutput.txt +++ b/ramblebotOutput.txt @@ -6,10 +6,14 @@ attorney: what school did you present when a fight . attorney: what was your res attorney: what school did you present when a fight . attorney: what was your responses must be oral, ok? what school did you performed on dead people? -witness: july 18th . attorney: doctor, how i dress when a fight . +witness: july 18th . +attorney: doctor, how i dress when a fight . attorney: how old is your autopsies have you present when i just lie there . -attorney: and what gear were you sexually active? witness: july 18th . +attorney: and what gear were you sexually active? + +witness: july 18th . + attorney: and reeboks . attorney: what ways does it affect your iq . attorney: and what gear were you performed on dead people?