forked from loftyoutcome/k8s-rag-llm
-
Notifications
You must be signed in to change notification settings - Fork 0
Open
Description
Currently the error message shown in the screenshot below makes it hard for the user/admin to understand what went wrong in the back end. We want to send a graceful error message to the UI so that the user has a little more information on what went wrong.
When the LLM is unable to return response, display a graceful error in the Chat UI.
This are the relevant logs from the rag llm image:
```2025-01-17 00:02:11,359 - INFO - Calling chat completions for JSON model...
2025-01-17 00:02:11,381 - INFO - HTTP Request: POST http://llm-model-serve-serve-svc.default.svc.cluster.local:8000/v1/chat/completions "HTTP/1.1 400 Bad Request"
INFO: 192.168.2.79:47810 - "GET /answer/What%20issues%20have%20been%20faced%20by%20%3F HTTP/1.1" 500 Internal Server Error
ERROR: Exception in ASGI application
Traceback (most recent call last):
File "/usr/local/lib/python3.11/site-packages/uvicorn/protocols/http/httptools_impl.py", line 409, in run_asgi
result = await app( # type: ignore[func-returns-value]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__
return await self.app(scope, receive, send)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/fastapi/applications.py", line 1054, in __call__
await super().__call__(scope, receive, send)
File "/usr/local/lib/python3.11/site-packages/starlette/applications.py", line 113, in __call__
await self.middleware_stack(scope, receive, send)
File "/usr/local/lib/python3.11/site-packages/starlette/middleware/errors.py", line 187, in __call__
raise exc
File "/usr/local/lib/python3.11/site-packages/starlette/middleware/errors.py", line 165, in __call__
await self.app(scope, receive, _send)
File "/usr/local/lib/python3.11/site-packages/starlette/middleware/exceptions.py", line 62, in __call__
await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
File "/usr/local/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
raise exc
File "/usr/local/lib/python3.11/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
await app(scope, receive, sender)
File "/usr/local/lib/python3.11/site-packages/starlette/routing.py", line 715, in __call__
await self.middleware_stack(scope, receive, send)
File "/usr/local/lib/python3.11/site-packages/starlette/routing.py", line 735, in app
await route.handle(scope, receive, send)
File "/usr/local/lib/python3.11/site-packages/starlette/routing.py", line 288, in handle
await self.app(scope, receive, send)
File "/usr/local/lib/python3.11/site-packages/starlette/routing.py", line 76, in app
await wrap_app_handling_exceptions(app, request)(scope, receive, send)
File "/usr/local/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
raise exc
File "/usr/local/lib/python3.11/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
await app(scope, receive, sender)
File "/usr/local/lib/python3.11/site-packages/starlette/routing.py", line 73, in app
response = await f(request)
^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/fastapi/routing.py", line 301, in app
raw_response = await run_endpoint_function(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/fastapi/routing.py", line 214, in run_endpoint_function
return await run_in_threadpool(dependant.call, **values)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/starlette/concurrency.py", line 39, in run_in_threadpool
return await anyio.to_thread.run_sync(func, *args)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/anyio/to_thread.py", line 56, in run_sync
return await get_async_backend().run_sync_in_worker_thread(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/anyio/_backends/_asyncio.py", line 2461, in run_sync_in_worker_thread
return await future
^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/anyio/_backends/_asyncio.py", line 962, in run
result = context.run(func, *args)
^^^^^^^^^^^^^^^^^^^^^^^^
File "/serveragllm/serveragllm.py", line 239, in read_item
answer = get_answer(question)
^^^^^^^^^^^^^^^^^^^^
File "/serveragllm/serveragllm.py", line 114, in get_answer
return get_answer_with_settings(
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/serveragllm/common.py", line 53, in get_answer_with_settings
completions = client.chat.completions.create(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/openai/_utils/_utils.py", line 279, in wrapper
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/openai/resources/chat/completions.py", line 859, in create
return self._post(
^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/openai/_base_client.py", line 1283, in post
return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/openai/_base_client.py", line 960, in request
return self._request(
^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/openai/_base_client.py", line 1064, in _request
raise self._make_status_error_from_response(err.response) from None
openai.BadRequestError: Error code: 400 - {'object': 'error', 'message': "This model's maximum context length is 4096 tokens. However, you requested 6379 tokens (6029 in the messages, 350 in the completion). Please reduce the length of the messages or completion.", 'type': 'BadRequestError', 'param': None, 'code': 400}```
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels
