@@ -141,15 +141,67 @@ def __init__(
141141 re .DOTALL | re .MULTILINE
142142 )
143143
144- # aliases have to be a word, so make a regular expression
145- # that matches the first word in the line. This regex has two
146- # parts, the first parenthesis enclosed group matches one
147- # or more non-whitespace characters (which may be preceeded
148- # by whitespace) and the second group matches either a whitespace
149- # character or the end of the string. We use \A and \Z to ensure
150- # we always match the beginning and end of a string that may have
151- # multiple lines
152- self .command_pattern = re .compile (r'\A\s*(\S+)(\s|\Z)+' )
144+ # commands have to be a word, so make a regular expression
145+ # that matches the first word in the line. This regex has three
146+ # parts:
147+ # - the '\A\s*' matches the beginning of the string (even
148+ # if contains multiple lines) and gobbles up any leading
149+ # whitespace
150+ # - the first parenthesis enclosed group matches one
151+ # or more non-whitespace characters with a non-greedy match
152+ # (that's what the '+?' part does). The non-greedy match
153+ # ensures that this first group doesn't include anything
154+ # matched by the second group
155+ # - the second parenthesis group must be dynamically created
156+ # because it needs to match either whitespace, something in
157+ # REDIRECTION_CHARS, one of the terminators, or the end of
158+ # the string (\Z matches the end of the string even if it
159+ # contains multiple lines)
160+ #
161+ invalid_command_chars = []
162+ invalid_command_chars .extend (constants .QUOTES )
163+ invalid_command_chars .extend (constants .REDIRECTION_CHARS )
164+ invalid_command_chars .extend (terminators )
165+ # escape each item so it will for sure get treated as a literal
166+ second_group_items = [re .escape (x ) for x in invalid_command_chars ]
167+ # add the whitespace and end of string, not escaped because they
168+ # are not literals
169+ second_group_items .extend ([r'\s' , r'\Z' ])
170+ # join them up with a pipe
171+ second_group = '|' .join (second_group_items )
172+ # build the regular expression
173+ expr = r'\A\s*(\S*?)({})' .format (second_group )
174+ self ._command_pattern = re .compile (expr )
175+
176+ def is_valid_command (self , word : str ) -> Tuple [bool , str ]:
177+ """Determine whether a word is a valid alias.
178+
179+ Aliases can not include redirection characters, whitespace,
180+ or termination characters.
181+
182+ If word is not a valid command, return False and a comma
183+ separated string of characters that can not appear in a command.
184+ This string is suitable for inclusion in an error message of your
185+ choice:
186+
187+ valid, invalidchars = statement_parser.is_valid_command('>')
188+ if not valid:
189+ errmsg = "Aliases can not contain: {}".format(invalidchars)
190+ """
191+ valid = False
192+
193+ errmsg = 'whitespace, quotes, '
194+ errchars = []
195+ errchars .extend (constants .REDIRECTION_CHARS )
196+ errchars .extend (self .terminators )
197+ errmsg += ', ' .join ([shlex .quote (x ) for x in errchars ])
198+
199+ match = self ._command_pattern .search (word )
200+ if match :
201+ if word == match .group (1 ):
202+ valid = True
203+ errmsg = None
204+ return valid , errmsg
153205
154206 def tokenize (self , line : str ) -> List [str ]:
155207 """Lex a string into a list of tokens.
@@ -324,16 +376,24 @@ def parse_command_only(self, rawinput: str) -> Statement:
324376
325377 command = None
326378 args = None
327- match = self .command_pattern .search (line )
379+ match = self ._command_pattern .search (line )
328380 if match :
329381 # we got a match, extract the command
330382 command = match .group (1 )
331- # the command_pattern regex is designed to match the spaces
383+ # the match could be an empty string, if so, turn it into none
384+ if not command :
385+ command = None
386+ # the _command_pattern regex is designed to match the spaces
332387 # between command and args with a second match group. Using
333388 # the end of the second match group ensures that args has
334389 # no leading whitespace. The rstrip() makes sure there is
335390 # no trailing whitespace
336391 args = line [match .end (2 ):].rstrip ()
392+ # if the command is none that means the input was either empty
393+ # or something wierd like '>'. args should be None if we couldn't
394+ # parse a command
395+ if not command or not args :
396+ args = None
337397
338398 # build the statement
339399 # string representation of args must be an empty string instead of
@@ -355,11 +415,11 @@ def _expand(self, line: str) -> str:
355415 for cur_alias in tmp_aliases :
356416 keep_expanding = False
357417 # apply our regex to line
358- match = self .command_pattern .search (line )
418+ match = self ._command_pattern .search (line )
359419 if match :
360420 # we got a match, extract the command
361421 command = match .group (1 )
362- if command == cur_alias :
422+ if command and command == cur_alias :
363423 # rebuild line with the expanded alias
364424 line = self .aliases [cur_alias ] + match .group (2 ) + line [match .end (2 ):]
365425 tmp_aliases .remove (cur_alias )
0 commit comments