From b2fb21fd3a667083893e8e59c9dfc86620e5d78f Mon Sep 17 00:00:00 2001 From: George Murray Date: Tue, 2 Jan 2024 15:40:01 -0800 Subject: [PATCH] GPT->LiteLLM --- README.md | 14 ++--- requirements.txt | 1 + src/api.py | 13 ++--- steamship.json | 110 --------------------------------------- test/test_integration.py | 27 +++++----- test/test_unit.py | 52 +++++++++--------- 6 files changed, 54 insertions(+), 163 deletions(-) delete mode 100644 steamship.json diff --git a/README.md b/README.md index c1976ec..976117d 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ -# OpenAI GPT-4 Plugin for Steamship +# LiteLLM Plugin for Steamship for OpenAI compatibility -This plugin provides access to OpenAI's GPT-4 language model for text generation. +This plugin provides access to LiteLLM's language model translation to OpenAI format for text generation. ## Usage @@ -11,8 +11,8 @@ Use of this plugin is subject to OpenAI's [Terms of Use](https://openai.com/poli #### Basic ```python -gpt4 = steamship.use_plugin("gpt-4") -task = gpt4.generate(text=prompt) +litellm = steamship.use_plugin("litellm") +task = litellm.generate(text=prompt) task.wait() for block in task.output.blocks: print(block.text) @@ -21,8 +21,8 @@ for block in task.output.blocks: #### With Runtime Parameters ```python -gpt4 = steamship.use_plugin("gpt-4") -task = gpt4.generate(text=prompt, options={"stop": ["6", "7"]}) +litellm = steamship.use_plugin("litellm") +task = litellm.generate(text=prompt, options={"stop": ["6", "7"]}) task.wait() for block in task.output.blocks: print(block.text) @@ -30,7 +30,7 @@ for block in task.output.blocks: ## Cost -[Pricing page](https://www.steamship.com/plugins/gpt-4?tab=Pricing) +[Pricing page](https://www.steamship.com/plugins/litellm?tab=Pricing) ## Developing this Plugin diff --git a/requirements.txt b/requirements.txt index 2577ced..9f629c8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ openai==0.27.8 tenacity==8.2.2 steamship==2.17.31 +litellm diff --git a/src/api.py b/src/api.py index 0acfc8d..b5be7ed 100644 --- a/src/api.py +++ b/src/api.py @@ -50,12 +50,12 @@ ] -class GPT4Plugin(StreamingGenerator): +class LiteLLMPlugin(StreamingGenerator): """ - Plugin for generating text using OpenAI's GPT-4 model. + Plugin for generating text using LiteLLM-supported models. """ - class GPT4PluginConfig(Config): + class LiteLLMPluginConfig(Config): openai_api_key: str = Field( "", description="An openAI API key to use. If left default, will use Steamship's API key.", @@ -113,9 +113,9 @@ class GPT4PluginConfig(Config): @classmethod def config_cls(cls) -> Type[Config]: - return cls.GPT4PluginConfig + return cls.LiteLLMPluginConfig - config: GPT4PluginConfig + config: LiteLLMPluginConfig def __init__( self, @@ -200,8 +200,9 @@ def generate_with_retry( output_blocks: List[Block], ) -> List[UsageReport]: """Call the API to generate the next section of text.""" + # TODO Fix logging.info( - f"Making OpenAI GPT-4 chat completion call on behalf of user with id: {user}" + f"Making LiteLLM call on behalf of user with id: {user}" ) options = options or {} stopwords = options.get("stop", None) diff --git a/steamship.json b/steamship.json deleted file mode 100644 index 57782fe..0000000 --- a/steamship.json +++ /dev/null @@ -1,110 +0,0 @@ -{ - "type": "plugin", - "handle": "gpt-4", - "version": "0.1.3", - "description": "", - "author": "dave", - "entrypoint": "Unused", - "public": true, - "plugin": { - "isTrainable": false, - "transport": "jsonOverHttp", - "type": "generator", - "streaming": true - }, - "build_config": { - "ignore": [ - "tests", - "examples" - ] - }, - "configTemplate": { - "openai_api_key": { - "type": "string", - "description": "An openAI API key to use. If left default, will use Steamship's API key.", - "default": "" - }, - "max_tokens": { - "type": "number", - "description": "The maximum number of tokens to generate per request. Can be overridden in runtime options.", - "default": 256 - }, - "model": { - "type": "string", - "description": "The OpenAI model to use. Can be a pre-existing fine-tuned model.", - "default": "gpt-4-0613" - }, - "temperature": { - "type": "number", - "description": "Controls randomness. Lower values produce higher likelihood / more predictable results; higher values produce more variety. Values between 0-1.", - "default": 0.4 - }, - "top_p": { - "type": "number", - "description": "Controls the nucleus sampling, where the model considers the results of the tokens with top_p probability mass. Values between 0-1.", - "default": 1 - }, - "presence_penalty": { - "type": "number", - "description": "Control how likely the model will reuse words. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. Number between -2.0 and 2.0.", - "default": 0 - }, - "frequency_penalty": { - "type": "number", - "description": "Control how likely the model will reuse words. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. Number between -2.0 and 2.0.", - "default": 0 - }, - "moderate_output": { - "type": "boolean", - "description": "Pass the generated output back through OpenAI's moderation endpoint and throw an exception if flagged.", - "default": true - }, - "max_retries": { - "type": "number", - "description": "Maximum number of retries to make when generating.", - "default": 8 - }, - "request_timeout": { - "type": "number", - "description": "Timeout for requests to OpenAI completion API. Default is 600 seconds.", - "default": 600 - }, - "n": { - "type": "number", - "description": "How many completions to generate for each prompt.", - "default": 1 - }, - "default_role": { - "type": "string", - "description": "The default role to use for a block that does not have a Tag of kind='role'", - "default": "user" - }, - "default_system_prompt": { - "type": "string", - "description": "System prompt that will be prepended before every request", - "default": "" - } - }, - "steamshipRegistry": { - "tagline": "Complete prompts and chats with GPT-4", - "tagline2": null, - "usefulFor": null, - "videoUrl": null, - "githubUrl": null, - "demoUrl": null, - "blogUrl": null, - "jupyterUrl": null, - "authorGithub": "dkolas", - "authorName": "dkolas", - "authorEmail": "developers@steamship.com", - "authorTwitter": null, - "authorUrl": null, - "tags": [ - "GPT-4", - "Prompt Completion", - "LLM", - "GPT", - "OpenAI" - ] - } -} \ No newline at end of file diff --git a/test/test_integration.py b/test/test_integration.py index 7afadfb..5026184 100644 --- a/test/test_integration.py +++ b/test/test_integration.py @@ -1,20 +1,19 @@ """Test gpt-4 generation via integration tests.""" import json -from typing import Optional import pytest from steamship import Block, File, Steamship, MimeTypes, Tag from steamship.data import TagKind from steamship.data.tags.tag_constants import RoleTag, TagValueKey -GENERATOR_HANDLE = "gpt-4" +GENERATOR_HANDLE = "litellm" @pytest.mark.parametrize( "model", ["", "gpt-4-32k", "gpt-4-1106-preview"] ) def test_generator(model: str): with Steamship.temporary_workspace() as steamship: - gpt4 = steamship.use_plugin(GENERATOR_HANDLE, config={"model": model}) + litellm = steamship.use_plugin(GENERATOR_HANDLE, config={"model": model}) file = File.create( steamship, blocks=[ @@ -31,7 +30,7 @@ def test_generator(model: str): ], ) - generate_task = gpt4.generate(input_file_id=file.id) + generate_task = litellm.generate(input_file_id=file.id) generate_task.wait() output = generate_task.output assert len(output.blocks) == 1 @@ -42,14 +41,14 @@ def test_generator(model: str): def test_generator_without_role(): with Steamship.temporary_workspace() as steamship: - gpt4 = steamship.use_plugin(GENERATOR_HANDLE) + litellm = steamship.use_plugin(GENERATOR_HANDLE) file = File.create( steamship, blocks=[ Block(text="1 2 3 4"), ], ) - generate_task = gpt4.generate(input_file_id=file.id) + generate_task = litellm.generate(input_file_id=file.id) generate_task.wait() output = generate_task.output assert len(output.blocks) == 1 @@ -57,7 +56,7 @@ def test_generator_without_role(): def test_stopwords(): with Steamship.temporary_workspace() as steamship: - gpt4 = steamship.use_plugin(GENERATOR_HANDLE) + litellm = steamship.use_plugin(GENERATOR_HANDLE) file = File.create( steamship, blocks=[ @@ -73,7 +72,7 @@ def test_stopwords(): ), ], ) - generate_task = gpt4.generate( + generate_task = litellm.generate( input_file_id=file.id, options={"stop": ["6", "7"]} ) generate_task.wait() @@ -85,7 +84,7 @@ def test_stopwords(): def test_functions(): with Steamship.temporary_workspace() as steamship: - gpt4 = steamship.use_plugin(GENERATOR_HANDLE) + litellm = steamship.use_plugin(GENERATOR_HANDLE) file = File.create( steamship, blocks=[ @@ -101,7 +100,7 @@ def test_functions(): ), ], ) - generate_task = gpt4.generate( + generate_task = litellm.generate( input_file_id=file.id, options={ "functions": [ @@ -129,7 +128,7 @@ def test_functions(): def test_multimodal_functions_with_blocks(): with Steamship.temporary_workspace() as steamship: - gpt4 = steamship.use_plugin(GENERATOR_HANDLE) + litellm = steamship.use_plugin(GENERATOR_HANDLE) file = File.create( steamship, blocks=[ @@ -161,7 +160,7 @@ def test_multimodal_functions_with_blocks(): ), ], ) - generate_task = gpt4.generate( + generate_task = litellm.generate( input_file_id=file.id, options={ "functions": [ @@ -220,7 +219,7 @@ def test_multimodal_functions_with_blocks(): def test_functions_function_message(): with Steamship.temporary_workspace() as steamship: - gpt4 = steamship.use_plugin(GENERATOR_HANDLE) + litellm = steamship.use_plugin(GENERATOR_HANDLE) file = File.create( steamship, @@ -248,7 +247,7 @@ def test_functions_function_message(): ], ) - generate_task = gpt4.generate( + generate_task = litellm.generate( input_file_id=file.id, options={ "functions": [ diff --git a/test/test_unit.py b/test/test_unit.py index c892b9d..6b8877b 100644 --- a/test/test_unit.py +++ b/test/test_unit.py @@ -13,13 +13,13 @@ from steamship.plugin.outputs.plugin_output import UsageReport, OperationUnit from steamship.plugin.request import PluginRequest -from src.api import GPT4Plugin +from src.api import LiteLLMPlugin @pytest.mark.parametrize("model", ["", "gpt-4-32k"]) def test_generator(model: str): with Steamship.temporary_workspace() as client: - gpt4 = GPT4Plugin(client=client, config={"n": 4, "model": model}) + litellm = LiteLLMPlugin(client=client, config={"n": 4, "model": model}) blocks = [ Block( @@ -34,7 +34,7 @@ def test_generator(model: str): ), ] - usage, new_blocks = run_test_streaming(client, gpt4, blocks, options={}) + usage, new_blocks = run_test_streaming(client, litellm, blocks, options={}) assert len(new_blocks) == 4 for block in new_blocks: assert block.text.strip().startswith("5 6 7 8") @@ -45,7 +45,7 @@ def test_generator(model: str): def test_stopwords(): with Steamship.temporary_workspace() as client: - gpt4 = GPT4Plugin(client=client, config={}) + litellm = LiteLLMPlugin(client=client, config={}) blocks = [ Block( @@ -61,7 +61,7 @@ def test_stopwords(): ] _, new_blocks = run_test_streaming( - client, gpt4, blocks=blocks, options={"stop": "6"} + client, litellm, blocks=blocks, options={"stop": "6"} ) assert len(new_blocks) == 1 assert new_blocks[0].text.strip() == "5" @@ -69,7 +69,7 @@ def test_stopwords(): def test_functions(): with Steamship.temporary_workspace() as client: - gpt4 = GPT4Plugin(client=client, config={}) + litellm = LiteLLMPlugin(client=client, config={}) blocks = [ Block( @@ -86,7 +86,7 @@ def test_functions(): _, new_blocks = run_test_streaming( client, - gpt4, + litellm, blocks=blocks, options={ "functions": [ @@ -112,7 +112,7 @@ def test_functions(): def test_functions_function_message(): with Steamship.temporary_workspace() as client: - gpt4 = GPT4Plugin(client=client, config={}) + litellm = LiteLLMPlugin(client=client, config={}) blocks = [ Block( @@ -138,7 +138,7 @@ def test_functions_function_message(): _, new_blocks = run_test_streaming( client, - gpt4, + litellm, blocks=blocks, options={ "functions": [ @@ -165,7 +165,7 @@ def test_functions_function_message(): def test_default_prompt(): with Steamship.temporary_workspace() as client: - gpt4 = GPT4Plugin( + litellm = LiteLLMPlugin( client=client, config={ "openai_api_key": "", @@ -184,7 +184,7 @@ def test_default_prompt(): ] _, new_blocks = run_test_streaming( - client, gpt4, blocks=blocks, options={"stop": "6"} + client, litellm, blocks=blocks, options={"stop": "6"} ) assert len(new_blocks) == 1 assert new_blocks[0].text.strip() == "YIKES!" @@ -192,7 +192,7 @@ def test_default_prompt(): def test_flagged_prompt(): with Steamship.temporary_workspace() as client: - gpt4 = GPT4Plugin(client=client, config={"openai_api_key": ""}) + litellm = LiteLLMPlugin(client=client, config={"openai_api_key": ""}) blocks = [ Block( @@ -202,28 +202,28 @@ def test_flagged_prompt(): ), ] with pytest.raises(SteamshipError): - _, _ = run_test_streaming(client, gpt4, blocks=blocks, options={}) + _, _ = run_test_streaming(client, litellm, blocks=blocks, options={}) def test_invalid_model_for_billing(): with pytest.raises(SteamshipError) as e: - _ = GPT4Plugin( + _ = LiteLLMPlugin( config={"model": "a model that does not exist", "openai_api_key": ""} ) assert "This plugin cannot be used with model" in str(e) def test_cant_override_model(): with Steamship.temporary_workspace() as client: - gpt4 = GPT4Plugin( + litellm = LiteLLMPlugin( config={} ) with pytest.raises(SteamshipError) as e: - _, _ = run_test_streaming(client, gpt4, blocks=[Block(text="yo")], options={"model":"gpt-3.5-turbo"}) + _, _ = run_test_streaming(client, litellm, blocks=[Block(text="yo")], options={"model":"gpt-3.5-turbo"}) assert "Model may not be overridden in options" in str(e) def test_streaming_generation(): with Steamship.temporary_workspace() as client: - gpt4 = GPT4Plugin(client=client, config={}) + litellm = LiteLLMPlugin(client=client, config={}) blocks = [ Block( @@ -234,7 +234,7 @@ def test_streaming_generation(): ] result_usage, result_blocks = run_test_streaming( - client, gpt4, blocks=blocks, options={"n": 3} + client, litellm, blocks=blocks, options={"n": 3} ) result_texts = [block.text for block in result_blocks] @@ -253,7 +253,7 @@ def test_streaming_generation(): def test_streaming_generation_with_moderation(): with Steamship.temporary_workspace() as client: - gpt4 = GPT4Plugin(client=client, config={}) + litellm = LiteLLMPlugin(client=client, config={}) file = File.create(client, blocks=[ Block( @@ -263,7 +263,7 @@ def test_streaming_generation_with_moderation(): ), ]) - blocks_to_allocate = gpt4.determine_output_block_types( + blocks_to_allocate = litellm.determine_output_block_types( PluginRequest(data=RawBlockAndTagPluginInput(blocks=file.blocks, options={"n": 1})) ) @@ -284,7 +284,7 @@ def test_streaming_generation_with_moderation(): assert file.blocks[1].stream_state == "started" with pytest.raises(SteamshipError): - gpt4.run( + litellm.run( PluginRequest( data=RawBlockAndTagPluginInputWithPreallocatedBlocks( blocks=file.blocks, options={"n": 1}, output_blocks=output_blocks @@ -303,7 +303,7 @@ def test_streaming_generation_with_moderation(): raw_text = file.blocks[1].raw() def run_test_streaming( - client: Steamship, plugin: GPT4Plugin, blocks: [Block], options: dict + client: Steamship, plugin: LiteLLMPlugin, blocks: [Block], options: dict ) -> ([UsageReport], [Block]): blocks_to_allocate = plugin.determine_output_block_types( PluginRequest(data=RawBlockAndTagPluginInput(blocks=blocks, options=options)) @@ -334,7 +334,7 @@ def run_test_streaming( def test_multimodal_functions_with_blocks(): with Steamship.temporary_workspace() as steamship: - gpt4 = GPT4Plugin(client=steamship, config={}) + litellm = LiteLLMPlugin(client=steamship, config={}) blocks = [ Block( text="You are a helpful AI assistant.", @@ -376,7 +376,7 @@ def test_multimodal_functions_with_blocks(): _, new_blocks = run_test_streaming( steamship, - gpt4, + litellm, blocks=blocks, options={ "functions": [ @@ -437,7 +437,7 @@ def fetch_result_text(block: Block) -> str: def test_prepare_messages(): - gpt4 = GPT4Plugin( + litellm = LiteLLMPlugin( config={}, ) @@ -473,7 +473,7 @@ def test_prepare_messages(): ) ] - messages = gpt4.prepare_messages(blocks=blocks) + messages = litellm.prepare_messages(blocks=blocks) expected_messages = [ {'role': 'system', 'content': 'You are a helpful AI assistant.\n\nNOTE: Some functions return images, video, and audio files. These multimedia files will be represented in messages as\nUUIDs for Steamship Blocks. When responding directly to a user, you SHOULD print the Steamship Blocks for the images,\nvideo, or audio as follows: `Block(UUID for the block)`.\n\nExample response for a request that generated an image:\nHere is the image you requested: Block(288A2CA1-4753-4298-9716-53C1E42B726B).\n\nOnly use the functions you have been provided with.\n'},