From eac202c9e25a3a311507c4f014d6a87c58b255c5 Mon Sep 17 00:00:00 2001 From: Aaron Chantrill Date: Mon, 1 May 2023 21:30:14 -0400 Subject: [PATCH] Fix consecutive stars I had a problem with a with two consecutive stars in it, one for a person's first name and the second for the last name. The was returning the last name correctly, but the or kept coming back empty. It turned out to be that the parser was looking for a match between the next word in the pattern and the next word in the that. When the next word was a star, it would not match. Here is an example: ASK ME A QUESTION YES DO YOU LISTEN TO * * MUSIC NO DO YOU LISTEN TO * * MUSIC This fixes the issue by checking if the next word in a match is another star and, if so, then stopping after the first word. --- .gitignore | 4 ++++ aiml/PatternMgr.py | 55 +++++++++++++++++++++++++++------------------- 2 files changed, 37 insertions(+), 22 deletions(-) diff --git a/.gitignore b/.gitignore index 19ea5ef..8e8af3a 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,7 @@ python_aiml.egg-info *.brn *.brain + +.ipynb_checkpoints/ +Lesson*.xml +*.ipynb diff --git a/aiml/PatternMgr.py b/aiml/PatternMgr.py index 9cb4a45..bb405ff 100644 --- a/aiml/PatternMgr.py +++ b/aiml/PatternMgr.py @@ -14,6 +14,7 @@ from .constants import * + class PatternMgr: # special dictionary keys _UNDERSCORE = 0 @@ -199,20 +200,21 @@ def star(self, starType, pattern, that, topic, index): else: # unknown value raise ValueError( "starType must be in ['star', 'thatstar', 'topicstar']" ) - + # compare the input string to the matched pattern, word by word. # At the end of this loop, if foundTheRightStar is true, start and # end will contain the start and end indices (in "words") of # the substring that the desired star matched. foundTheRightStar = False start = end = j = numStars = k = 0 - for i in range(len(words)): + for i in range(len(words)): # i is the word index # This condition is true after processing a star # that ISN'T the one we're looking for. - if i < k: - continue + if i < k: # k is the current word within the star match, + # or the first word after the star match. + continue # until the word index reaches the end of the star. # If we're reached the end of the pattern, we're done. - if j == len(patMatch): + if j == len(patMatch): # j is the current pattern location break if not foundTheRightStar: if patMatch[j] in [self._STAR, self._UNDERSCORE]: #we got a star @@ -221,19 +223,25 @@ def star(self, starType, pattern, that, topic, index): # This is the star we care about. foundTheRightStar = True start = i - # Iterate through the rest of the string. - for k in range (i, len(words)): - # If the star is at the end of the pattern, - # we know exactly where it ends. - if j+1 == len (patMatch): - end = len (words) - break - # If the words have started matching the - # pattern again, the star has ended. - if patMatch[j+1] == words[k]: - end = k - 1 - i = k - break + # If the star is at the end of the pattern, + # match the rest of the words. + if j+1 == len (patMatch): + end = len (words) + else: + # If the next word in the pattern is another star, then + # we only want to match the one word + if patMatch[j+1] in [self._STAR, self._UNDERSCORE]: + end = j + else: + # Iterate through the rest of the words searching + # for the end of the star. + for k in range (i, len(words)): + # If the words have started matching the + # pattern again, the star has ended. + if patMatch[j+1] == words[k]: + end = k - 1 + i = k + break # If we just finished processing the star we cared # about, we exit the loop early. if foundTheRightStar: @@ -243,10 +251,13 @@ def star(self, starType, pattern, that, topic, index): # extract the star words from the original, unmutilated input. if foundTheRightStar: - #print( ' '.join(pattern.split()[start:end+1]) ) - if starType == 'star': return ' '.join(pattern.split()[start:end+1]) - elif starType == 'thatstar': return ' '.join(that.split()[start:end+1]) - elif starType == 'topicstar': return ' '.join(topic.split()[start:end+1]) + if starType == 'star': + match = ' '.join(pattern.split()[start:end+1]) + elif starType == 'thatstar': + match = ' '.join(that.split()[start:end+1]) + elif starType == 'topicstar': + match = ' '.join(topic.split()[start:end+1]) + return match else: return u"" def _match(self, words, thatWords, topicWords, root):