implements inceptron provider

RobertBorg · RobertBorg · commit a5c3ea5ccb57 · 2025-11-14T13:07:24.000+01:00
diff --git a/packages/hub/src/types/api/api-collection.ts b/packages/hub/src/types/api/api-collection.ts
@@ -57,6 +57,7 @@ interface ApiCollectionItemModel extends ApiCollectionItemBase {
 			| "groq"
 			| "hf-inference"
 			| "hyperbolic"
+			| "inceptron"
 			| "nebius"
 			| "novita"
 			| "nscale"
diff --git a/packages/inference/README.md b/packages/inference/README.md
@@ -52,6 +52,7 @@ Currently, we support the following providers:
 - [Fireworks AI](https://fireworks.ai)
 - [HF Inference](https://huggingface.co/docs/inference-providers/providers/hf-inference)
 - [Hyperbolic](https://hyperbolic.xyz)
+- [Inceptron](https://inceptron.io)
 - [Nebius](https://studio.nebius.ai)
 - [Novita](https://novita.ai)
 - [Nscale](https://nscale.com)
@@ -93,6 +94,7 @@ Only a subset of models are supported when requesting third-party providers. You
 - [Fireworks AI supported models](https://huggingface.co/api/partners/fireworks-ai/models)
 - [HF Inference supported models](https://huggingface.co/api/partners/hf-inference/models)
 - [Hyperbolic supported models](https://huggingface.co/api/partners/hyperbolic/models)
+- [Inceptron supported models](https://huggingface.co/api/partners/inceptron/models)
 - [Nebius supported models](https://huggingface.co/api/partners/nebius/models)
 - [Nscale supported models](https://huggingface.co/api/partners/nscale/models)
 - [OVHcloud supported models](https://huggingface.co/api/partners/ovhcloud/models)
diff --git a/packages/inference/src/lib/getProviderHelper.ts b/packages/inference/src/lib/getProviderHelper.ts
@@ -9,6 +9,7 @@ import * as Fireworks from "../providers/fireworks-ai.js";
 import * as Groq from "../providers/groq.js";
 import * as HFInference from "../providers/hf-inference.js";
 import * as Hyperbolic from "../providers/hyperbolic.js";
+import * as Inceptron from "../providers/inceptron.js";
 import * as Nebius from "../providers/nebius.js";
 import * as Novita from "../providers/novita.js";
 import * as Nscale from "../providers/nscale.js";
@@ -128,6 +129,10 @@ export const PROVIDERS: Record<InferenceProvider, Partial<Record<InferenceTask,
 		conversational: new Hyperbolic.HyperbolicConversationalTask(),
 		"text-generation": new Hyperbolic.HyperbolicTextGenerationTask(),
 	},
+	inceptron: {
+		conversational: new Inceptron.InceptronConversationalTask(),
+		"text-generation": new Inceptron.InceptronTextGenerationTask(),
+	},
 	nebius: {
 		"text-to-image": new Nebius.NebiusTextToImageTask(),
 		conversational: new Nebius.NebiusConversationalTask(),
diff --git a/packages/inference/src/providers/consts.ts b/packages/inference/src/providers/consts.ts
@@ -29,6 +29,7 @@ export const HARDCODED_MODEL_INFERENCE_MAPPING: Record<
 	groq: {},
 	"hf-inference": {},
 	hyperbolic: {},
+	inceptron: {},
 	nebius: {},
 	novita: {},
 	nscale: {},
diff --git a/packages/inference/src/providers/inceptron.ts b/packages/inference/src/providers/inceptron.ts
@@ -0,0 +1,44 @@
+import type { TextGenerationOutput } from "@huggingface/tasks";
+import type { BodyParams } from "../types.js";
+import { BaseConversationalTask, BaseTextGenerationTask } from "./providerHelper.js";
+import { InferenceClientProviderOutputError } from "../errors.js";
+
+const INCEPTRON_API_BASE_URL = "https://openrouter.inceptron.io";
+
+export class InceptronConversationalTask extends BaseConversationalTask {
+	constructor() {
+		super("inceptron", INCEPTRON_API_BASE_URL);
+	}
+}
+
+export class InceptronTextGenerationTask extends BaseTextGenerationTask {
+	constructor() {
+		super("inceptron", INCEPTRON_API_BASE_URL);
+	}
+
+	override preparePayload(params: BodyParams): Record<string, unknown> {
+		const payload = super.preparePayload(params);
+		if (params.args.inputs) {
+			payload.prompt = params.args.inputs;
+			delete payload.inputs;
+		}
+		return payload;
+	}
+
+	override async getResponse(response: unknown): Promise<TextGenerationOutput> {
+		if (
+			typeof response === "object" &&
+			response &&
+			"choices" in response &&
+			Array.isArray(response.choices) &&
+			response.choices.length > 0 &&
+			"text" in response.choices[0]
+		) {
+			return {
+				generated_text: response.choices[0].text,
+			};
+		}
+
+		throw new InferenceClientProviderOutputError("Received malformed response from Inceptron text generation API");
+	}
+}
diff --git a/packages/inference/src/types.ts b/packages/inference/src/types.ts
@@ -56,6 +56,7 @@ export const INFERENCE_PROVIDERS = [
 	"groq",
 	"hf-inference",
 	"hyperbolic",
+	"inceptron",
 	"nebius",
 	"novita",
 	"nscale",
@@ -93,6 +94,7 @@ export const PROVIDERS_HUB_ORGS: Record<InferenceProvider, string> = {
 	groq: "groq",
 	"hf-inference": "hf-inference",
 	hyperbolic: "Hyperbolic",
+	inceptron: "inceptron",
 	nebius: "nebius",
 	novita: "novita",
 	nscale: "nscale",
diff --git a/packages/inference/test/InferenceClient.spec.ts b/packages/inference/test/InferenceClient.spec.ts
@@ -1411,6 +1411,67 @@ describe.skip("InferenceClient", () => {
 		TIMEOUT
 	);
 
+	describe.concurrent(
+		"Inceptron",
+		() => {
+			const client = new InferenceClient(env.HF_INCEPTRON_KEY ?? "dummy");
+
+			HARDCODED_MODEL_INFERENCE_MAPPING.inceptron = {
+				"meta-llama/Llama-3.3-70B-Instruct": {
+					provider: "inceptron",
+					hfModelId: "meta-llama/Llama-3.3-70B-Instruct",
+					providerId: "meta-llama/Llama-3.3-70B-Instruct",
+					status: "live",
+					task: "conversational",
+				},
+			};
+
+			it("chatCompletion", async () => {
+				const res = await client.chatCompletion({
+					model: "meta-llama/Llama-3.3-70B-Instruct",
+					provider: "inceptron",
+					messages: [{ role: "user", content: "Complete this sentence with words, one plus one is equal " }],
+				});
+				if (res.choices && res.choices.length > 0) {
+					const completion = res.choices[0].message?.content;
+					expect(completion).toMatch(/(two|2)/i);
+				}
+			});
+
+			it("chatCompletion stream", async () => {
+				const stream = client.chatCompletionStream({
+					model: "meta-llama/Llama-3.3-70B-Instruct",
+					provider: "inceptron",
+					messages: [{ role: "user", content: "Complete the equation 1 + 1 = , just the answer" }],
+				}) as AsyncGenerator<ChatCompletionStreamOutput>;
+				let out = "";
+				for await (const chunk of stream) {
+					if (chunk.choices && chunk.choices.length > 0) {
+						out += chunk.choices[0].delta.content;
+					}
+				}
+				expect(out).toMatch(/(two|2)/i);
+			});
+
+			it("textGeneration", async () => {
+				const res = await client.textGeneration({
+					model: "meta-llama/Llama-3.3-70B-Instruct",
+					provider: "inceptron",
+					inputs: "Once upon a time,",
+					parameters: {
+						temperature: 0,
+						max_new_tokens: 20,
+					},
+				});
+
+				expect(res).toHaveProperty("generated_text");
+				expect(typeof res.generated_text).toBe("string");
+				expect(res.generated_text.length).toBeGreaterThan(0);
+			});
+		},
+		TIMEOUT
+	);
+
 	describe.concurrent(
 		"Nebius",
 		() => {