diff --git a/ai_worker/main.py b/ai_worker/main.py index b65171d..ab6d154 100644 --- a/ai_worker/main.py +++ b/ai_worker/main.py @@ -262,9 +262,9 @@ async def guess_layers(self, model_path): return max(0, est_layers - self.conf.layer_offset) def clear_llama_model(self): - if llama_cpp.server.app.llama: + if llama_cpp.server.app._llama_proxy: # critical... must del this before creating a new app - llama_cpp.server.app.llama = None + llama_cpp.server.app._llama_proxy = None self.llama = None self.llama_cli = None diff --git a/poetry.lock b/poetry.lock index c3f9353..4abd8df 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.1 and should not be changed by hand. [[package]] name = "accelerate" @@ -1101,17 +1101,18 @@ files = [ [[package]] name = "llama-cpp-python" -version = "0.2.7" +version = "0.2.53" description = "Python bindings for the llama.cpp library" optional = false python-versions = ">=3.8" files = [ - {file = "llama_cpp_python-0.2.7.tar.gz", hash = "sha256:2267d3e90bf461ee581dc78332007ea15d9b40e79471850455458b399e3f1887"}, + {file = "llama_cpp_python-0.2.53.tar.gz", hash = "sha256:f7ff8eda538ca6c80521a8bbf80d3ef4527ecb28f6d08fa9b3bb1f0cfc3b684e"}, ] [package.dependencies] diskcache = ">=5.6.1" fastapi = {version = ">=0.100.0", optional = true, markers = "extra == \"server\""} +jinja2 = ">=2.11.3" numpy = ">=1.20.0" pydantic-settings = {version = ">=2.0.1", optional = true, markers = "extra == \"server\""} sse-starlette = {version = ">=1.6.1", optional = true, markers = "extra == \"server\""} @@ -1123,7 +1124,7 @@ uvicorn = {version = ">=0.22.0", optional = true, markers = "extra == \"server\" all = ["llama_cpp_python[dev,server,test]"] dev = ["black (>=23.3.0)", "httpx (>=0.24.1)", "mkdocs (>=1.4.3)", "mkdocs-material (>=9.1.18)", "mkdocstrings[python] (>=0.22.0)", "pytest (>=7.4.0)", "twine (>=4.0.2)"] server = ["fastapi (>=0.100.0)", "pydantic-settings (>=2.0.1)", "sse-starlette (>=1.6.1)", "starlette-context (>=0.3.6,<0.4)", "uvicorn (>=0.22.0)"] -test = ["httpx (>=0.24.1)", "pytest (>=7.4.0)"] +test = ["httpx (>=0.24.1)", "pytest (>=7.4.0)", "scipy (>=1.10)"] [[package]] name = "macholib" @@ -2811,17 +2812,22 @@ files = [ [[package]] name = "sse-starlette" -version = "1.6.5" -description = "\"SSE plugin for Starlette\"" +version = "2.0.0" +description = "SSE plugin for Starlette" optional = false python-versions = ">=3.8" files = [ - {file = "sse-starlette-1.6.5.tar.gz", hash = "sha256:819f2c421fb37067380fe3dcaba246c476b02651b7bb7601099a378ad802a0ac"}, - {file = "sse_starlette-1.6.5-py3-none-any.whl", hash = "sha256:68b6b7eb49be0c72a2af80a055994c13afcaa4761b29226beb208f954c25a642"}, + {file = "sse_starlette-2.0.0-py3-none-any.whl", hash = "sha256:c4dd134302cb9708d47cae23c365fe0a089aa2a875d2f887ac80f235a9ee5744"}, + {file = "sse_starlette-2.0.0.tar.gz", hash = "sha256:0c43cc43aca4884c88c8416b65777c4de874cc4773e6458d3579c0a353dc2fb7"}, ] [package.dependencies] +anyio = "*" starlette = "*" +uvicorn = "*" + +[package.extras] +examples = ["fastapi"] [[package]] name = "starlette" @@ -3139,13 +3145,13 @@ zstd = ["zstandard (>=0.18.0)"] [[package]] name = "uvicorn" -version = "0.24.0.post1" +version = "0.27.1" description = "The lightning-fast ASGI server." optional = false python-versions = ">=3.8" files = [ - {file = "uvicorn-0.24.0.post1-py3-none-any.whl", hash = "sha256:7c84fea70c619d4a710153482c0d230929af7bcf76c7bfa6de151f0a3a80121e"}, - {file = "uvicorn-0.24.0.post1.tar.gz", hash = "sha256:09c8e5a79dc466bdf28dead50093957db184de356fcdc48697bad3bde4c2588e"}, + {file = "uvicorn-0.27.1-py3-none-any.whl", hash = "sha256:5c89da2f3895767472a35556e539fd59f7edbe9b1e9c0e1c99eebeadc61838e4"}, + {file = "uvicorn-0.27.1.tar.gz", hash = "sha256:3d9a267296243532db80c83a959a3400502165ade2c1338dea4e67915fd4745a"}, ] [package.dependencies] @@ -3457,4 +3463,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.12" -content-hash = "bf5bc222fc88579f906ace5295144bd3dfe352c562e2968214a63c8f7ce3e69d" +content-hash = "35574759fca1399995733ee0261dc71a138bc9137e54a7c9c16de1b7585d7ab5" diff --git a/pyproject.toml b/pyproject.toml index 316d8e6..bcc4973 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ packages = [{include = "ai_worker"}, {include = "gguf_loader"}] [tool.poetry.dependencies] python = ">=3.10,<3.12" -llama-cpp-python = {extras = ["server"], version = "0.2.7"} +llama-cpp-python = {version = "0.2.53", extras = ["server"]} pydantic-settings = "^2.0.3" fastapi = "^0.103.1" websockets = "^11.0.3"