From 6a555bfa60f462390bba80df5348661fe9ba0e67 Mon Sep 17 00:00:00 2001 From: Robert Borg Date: Tue, 11 Nov 2025 14:26:43 +0100 Subject: [PATCH] implements inceptron provider --- packages/hub/src/types/api/api-collection.ts | 1 + packages/inference/README.md | 2 + .../inference/src/lib/getProviderHelper.ts | 5 ++ packages/inference/src/providers/consts.ts | 1 + packages/inference/src/providers/inceptron.ts | 44 +++++++++++++ packages/inference/src/types.ts | 2 + .../inference/test/InferenceClient.spec.ts | 61 +++++++++++++++++++ 7 files changed, 116 insertions(+) create mode 100644 packages/inference/src/providers/inceptron.ts diff --git a/packages/hub/src/types/api/api-collection.ts b/packages/hub/src/types/api/api-collection.ts index 94b214e220..dd4f62072b 100644 --- a/packages/hub/src/types/api/api-collection.ts +++ b/packages/hub/src/types/api/api-collection.ts @@ -57,6 +57,7 @@ interface ApiCollectionItemModel extends ApiCollectionItemBase { | "groq" | "hf-inference" | "hyperbolic" + | "inceptron" | "nebius" | "novita" | "nscale" diff --git a/packages/inference/README.md b/packages/inference/README.md index 664c224583..112458278b 100644 --- a/packages/inference/README.md +++ b/packages/inference/README.md @@ -52,6 +52,7 @@ Currently, we support the following providers: - [Fireworks AI](https://fireworks.ai) - [HF Inference](https://huggingface.co/docs/inference-providers/providers/hf-inference) - [Hyperbolic](https://hyperbolic.xyz) +- [Inceptron](https://inceptron.io) - [Nebius](https://studio.nebius.ai) - [Novita](https://novita.ai) - [Nscale](https://nscale.com) @@ -93,6 +94,7 @@ Only a subset of models are supported when requesting third-party providers. You - [Fireworks AI supported models](https://huggingface.co/api/partners/fireworks-ai/models) - [HF Inference supported models](https://huggingface.co/api/partners/hf-inference/models) - [Hyperbolic supported models](https://huggingface.co/api/partners/hyperbolic/models) +- [Inceptron supported models](https://huggingface.co/api/partners/inceptron/models) - [Nebius supported models](https://huggingface.co/api/partners/nebius/models) - [Nscale supported models](https://huggingface.co/api/partners/nscale/models) - [OVHcloud supported models](https://huggingface.co/api/partners/ovhcloud/models) diff --git a/packages/inference/src/lib/getProviderHelper.ts b/packages/inference/src/lib/getProviderHelper.ts index e18a258a0f..0992940e7a 100644 --- a/packages/inference/src/lib/getProviderHelper.ts +++ b/packages/inference/src/lib/getProviderHelper.ts @@ -9,6 +9,7 @@ import * as Fireworks from "../providers/fireworks-ai.js"; import * as Groq from "../providers/groq.js"; import * as HFInference from "../providers/hf-inference.js"; import * as Hyperbolic from "../providers/hyperbolic.js"; +import * as Inceptron from "../providers/inceptron.js"; import * as Nebius from "../providers/nebius.js"; import * as Novita from "../providers/novita.js"; import * as Nscale from "../providers/nscale.js"; @@ -128,6 +129,10 @@ export const PROVIDERS: Record { + const payload = super.preparePayload(params); + if (params.args.inputs) { + payload.prompt = params.args.inputs; + delete payload.inputs; + } + return payload; + } + + override async getResponse(response: unknown): Promise { + if ( + typeof response === "object" && + response && + "choices" in response && + Array.isArray(response.choices) && + response.choices.length > 0 && + "text" in response.choices[0] + ) { + return { + generated_text: response.choices[0].text, + }; + } + + throw new InferenceClientProviderOutputError("Received malformed response from Inceptron text generation API"); + } +} diff --git a/packages/inference/src/types.ts b/packages/inference/src/types.ts index e4165914d6..7aec2dae48 100644 --- a/packages/inference/src/types.ts +++ b/packages/inference/src/types.ts @@ -56,6 +56,7 @@ export const INFERENCE_PROVIDERS = [ "groq", "hf-inference", "hyperbolic", + "inceptron", "nebius", "novita", "nscale", @@ -93,6 +94,7 @@ export const PROVIDERS_HUB_ORGS: Record = { groq: "groq", "hf-inference": "hf-inference", hyperbolic: "Hyperbolic", + inceptron: "inceptron", nebius: "nebius", novita: "novita", nscale: "nscale", diff --git a/packages/inference/test/InferenceClient.spec.ts b/packages/inference/test/InferenceClient.spec.ts index 00f9d9953f..5253f5fb3d 100644 --- a/packages/inference/test/InferenceClient.spec.ts +++ b/packages/inference/test/InferenceClient.spec.ts @@ -1411,6 +1411,67 @@ describe.skip("InferenceClient", () => { TIMEOUT ); + describe.concurrent( + "Inceptron", + () => { + const client = new InferenceClient(env.HF_INCEPTRON_KEY ?? "dummy"); + + HARDCODED_MODEL_INFERENCE_MAPPING.inceptron = { + "meta-llama/Llama-3.3-70B-Instruct": { + provider: "inceptron", + hfModelId: "meta-llama/Llama-3.3-70B-Instruct", + providerId: "meta-llama/Llama-3.3-70B-Instruct", + status: "live", + task: "conversational", + }, + }; + + it("chatCompletion", async () => { + const res = await client.chatCompletion({ + model: "meta-llama/Llama-3.3-70B-Instruct", + provider: "inceptron", + messages: [{ role: "user", content: "Complete this sentence with words, one plus one is equal " }], + }); + if (res.choices && res.choices.length > 0) { + const completion = res.choices[0].message?.content; + expect(completion).toMatch(/(two|2)/i); + } + }); + + it("chatCompletion stream", async () => { + const stream = client.chatCompletionStream({ + model: "meta-llama/Llama-3.3-70B-Instruct", + provider: "inceptron", + messages: [{ role: "user", content: "Complete the equation 1 + 1 = , just the answer" }], + }) as AsyncGenerator; + let out = ""; + for await (const chunk of stream) { + if (chunk.choices && chunk.choices.length > 0) { + out += chunk.choices[0].delta.content; + } + } + expect(out).toMatch(/(two|2)/i); + }); + + it("textGeneration", async () => { + const res = await client.textGeneration({ + model: "meta-llama/Llama-3.3-70B-Instruct", + provider: "inceptron", + inputs: "Once upon a time,", + parameters: { + temperature: 0, + max_new_tokens: 20, + }, + }); + + expect(res).toHaveProperty("generated_text"); + expect(typeof res.generated_text).toBe("string"); + expect(res.generated_text.length).toBeGreaterThan(0); + }); + }, + TIMEOUT + ); + describe.concurrent( "Nebius", () => {