diff --git a/cecli/args.py b/cecli/args.py index 1ba9de0cd7f..0395d987a33 100644 --- a/cecli/args.py +++ b/cecli/args.py @@ -241,6 +241,12 @@ def get_parser(default_config_files, git_root): " If unspecified, defaults to the model's max_chat_history_tokens." ), ) + group.add_argument( + "--retries", + metavar="RETRIES_JSON", + help="Specify LLM retry configuration as a JSON string", + default=None, + ) ####### group = parser.add_argument_group("Customization Settings") @@ -1115,4 +1121,4 @@ def main(): if __name__ == "__main__": status = main() - sys.exit(status) + sys.exit(status) \ No newline at end of file diff --git a/cecli/main.py b/cecli/main.py index 624af74cb4a..b49e5583c48 100644 --- a/cecli/main.py +++ b/cecli/main.py @@ -556,6 +556,8 @@ async def main_async(argv=None, input=None, output=None, force_git_root=None, re args.mcp_servers = convert_yaml_to_json_string(args.mcp_servers) if hasattr(args, "custom") and args.custom is not None: args.custom = convert_yaml_to_json_string(args.custom) + if hasattr(args, "retries") and args.retries is not None: + args.retries = convert_yaml_to_json_string(args.retries) if args.debug: global log_file os.makedirs(".cecli/logs/", exist_ok=True) @@ -861,6 +863,7 @@ def apply_model_overrides(model_name): verbose=args.verbose, io=io, override_kwargs=main_model_overrides, + retries=args.retries, ) if args.copy_paste and main_model.copy_paste_transport == "api": main_model.enable_copy_paste_mode() @@ -1285,4 +1288,4 @@ async def graceful_exit(coder=None, exit_code=0): if __name__ == "__main__": status = main() - sys.exit(status) + sys.exit(status) \ No newline at end of file diff --git a/cecli/models.py b/cecli/models.py index 42410231d8b..ea8a6c0f8e4 100644 --- a/cecli/models.py +++ b/cecli/models.py @@ -309,6 +309,7 @@ def __init__( verbose=False, io=None, override_kwargs=None, + retries=None, ): provided_model = model or "" if isinstance(provided_model, Model): @@ -327,6 +328,7 @@ def __init__( model = provided_model model = MODEL_ALIASES.get(model, model) self.name = model + self.retries = retries self.max_chat_history_tokens = 1024 self.weak_model = None self.editor_model = None @@ -950,15 +952,64 @@ async def send_completion( "Editor-Version": f"cecli/{__version__}", "Copilot-Integration-Id": "vscode-chat", } - try: - res = await litellm.acompletion(**kwargs) - except Exception as err: - print(f"LiteLLM API Error: {str(err)}") - res = self.model_error_response() - if self.verbose: - print(f"LiteLLM API Error: {str(err)}") - raise - return hash_object, res + if self.retries: + try: + retries_config = json.loads(self.retries) + if "timeout" in retries_config: + self.request_timeout = retries_config["timeout"] + if "backoff-factor" in retries_config: + self.retry_backoff_factor = float(retries_config["backoff-factor"]) + if "retry-on-unavailable" in retries_config: + self.retry_on_unavailable = bool(retries_config["retry-on-unavailable"]) + if "retry-timeout" in retries_config: + self.retry_timeout = float(retries_config["retry-timeout"]) + except (json.JSONDecodeError, TypeError, ValueError): + pass + + kwargs["timeout"] = self.request_timeout + + litellm_ex = LiteLLMExceptions() + retry_delay = 0.125 + + while True: + try: + if self.verbose: + dump(kwargs) + res = await litellm.acompletion(**kwargs) + return hash_object, res + except litellm.ContextWindowExceededError as err: + raise err + except litellm_ex.exceptions_tuple() as err: + ex_info = litellm_ex.get_ex_info(err) + should_retry = ex_info.retry + if ex_info.name == "ServiceUnavailableError": + should_retry = should_retry or self.retry_on_unavailable + + if should_retry: + retry_delay *= self.retry_backoff_factor + if retry_delay > self.retry_timeout: + should_retry = False + + # Check for non-retryable RateLimitError within ServiceUnavailableError + if ( + isinstance(err, litellm.ServiceUnavailableError) + and "RateLimitError" in str(err) + and 'status_code: 429, message: "Resource has been exhausted' in str(err) + ): + should_retry = False + + if not should_retry: + print(f"LiteLLM API Error: {str(err)}") + if ex_info.description: + print(ex_info.description) + if stream: + return hash_object, self.model_error_response_stream() + else: + return hash_object, self.model_error_response() + + print(f"Retrying in {retry_delay:.1f} seconds...") + await asyncio.sleep(retry_delay) + continue async def simple_send_with_retries(self, messages, max_tokens=None): from cecli.exceptions import LiteLLMExceptions @@ -997,21 +1048,22 @@ async def simple_send_with_retries(self, messages, max_tokens=None): except AttributeError: return None - async def model_error_response(self): - for i in range(1): - await asyncio.sleep(0.1) - yield litellm.ModelResponse( - choices=[ - litellm.Choices( - finish_reason="stop", - index=0, - message=litellm.Message( - content="Model API Response Error. Please retry the previous request" - ), - ) - ], - model=self.name, - ) + def model_error_response(self): + return litellm.ModelResponse( + choices=[ + litellm.Choices( + finish_reason="stop", + index=0, + message=litellm.Message( + content="Model API Response Error. Please retry the previous request" + ), + ) + ], + model=self.name, + ) + + async def model_error_response_stream(self): + yield self.model_error_response() def register_models(model_settings_fnames): @@ -1252,4 +1304,4 @@ def main(): if __name__ == "__main__": - main() + main() \ No newline at end of file diff --git a/cecli/website/docs/config.md b/cecli/website/docs/config.md index 5a12fcb0eb9..f99b66d6442 100644 --- a/cecli/website/docs/config.md +++ b/cecli/website/docs/config.md @@ -40,5 +40,35 @@ Using an `.env` file: CECLI_DARK_MODE=true ``` -{% include keys.md %} +## Retries + +Aider can be configured to retry failed API calls. +This is useful for handling intermittent network issues or other transient errors. +The `retries` option is a JSON object that can be configured with the following keys: + +- `retry-timeout`: The timeout in seconds for each retry. +- `retry-backoff-factor`: The backoff factor to use between retries. +- `retry-on-unavailable`: Whether to retry on 503 Service Unavailable errors. + +Example usage in `.aider.conf.yml`: + +```yaml +retries: + retry-timeout: 30 + retry-backoff-factor: 1.50 + retry-on-unavailable: true +``` + +This can also be set with the `--retries` command line switch, passing a JSON string: + +``` +$ aider --retries '{"retry-timeout": 30, "retry-backoff-factor": 1.50, "retry-on-unavailable": true}' +``` + +Or by setting the `CECLI_RETRIES` environment variable: + +``` +export CECLI_RETRIES='{"retry-timeout": 30, "retry-backoff-factor": 1.50, "retry-on-unavailable": true}' +``` +{% include keys.md %}