Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions packages/hub/src/types/api/api-collection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ interface ApiCollectionItemModel extends ApiCollectionItemBase {
| "groq"
| "hf-inference"
| "hyperbolic"
| "inceptron"
| "nebius"
| "novita"
| "nscale"
Expand Down
2 changes: 2 additions & 0 deletions packages/inference/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ Currently, we support the following providers:
- [Fireworks AI](https://fireworks.ai)
- [HF Inference](https://huggingface.co/docs/inference-providers/providers/hf-inference)
- [Hyperbolic](https://hyperbolic.xyz)
- [Inceptron](https://inceptron.io)
- [Nebius](https://studio.nebius.ai)
- [Novita](https://novita.ai)
- [Nscale](https://nscale.com)
Expand Down Expand Up @@ -93,6 +94,7 @@ Only a subset of models are supported when requesting third-party providers. You
- [Fireworks AI supported models](https://huggingface.co/api/partners/fireworks-ai/models)
- [HF Inference supported models](https://huggingface.co/api/partners/hf-inference/models)
- [Hyperbolic supported models](https://huggingface.co/api/partners/hyperbolic/models)
- [Inceptron supported models](https://huggingface.co/api/partners/inceptron/models)
- [Nebius supported models](https://huggingface.co/api/partners/nebius/models)
- [Nscale supported models](https://huggingface.co/api/partners/nscale/models)
- [OVHcloud supported models](https://huggingface.co/api/partners/ovhcloud/models)
Expand Down
5 changes: 5 additions & 0 deletions packages/inference/src/lib/getProviderHelper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import * as Fireworks from "../providers/fireworks-ai.js";
import * as Groq from "../providers/groq.js";
import * as HFInference from "../providers/hf-inference.js";
import * as Hyperbolic from "../providers/hyperbolic.js";
import * as Inceptron from "../providers/inceptron.js";
import * as Nebius from "../providers/nebius.js";
import * as Novita from "../providers/novita.js";
import * as Nscale from "../providers/nscale.js";
Expand Down Expand Up @@ -128,6 +129,10 @@ export const PROVIDERS: Record<InferenceProvider, Partial<Record<InferenceTask,
conversational: new Hyperbolic.HyperbolicConversationalTask(),
"text-generation": new Hyperbolic.HyperbolicTextGenerationTask(),
},
inceptron: {
conversational: new Inceptron.InceptronConversationalTask(),
"text-generation": new Inceptron.InceptronTextGenerationTask(),
},
nebius: {
"text-to-image": new Nebius.NebiusTextToImageTask(),
conversational: new Nebius.NebiusConversationalTask(),
Expand Down
1 change: 1 addition & 0 deletions packages/inference/src/providers/consts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ export const HARDCODED_MODEL_INFERENCE_MAPPING: Record<
groq: {},
"hf-inference": {},
hyperbolic: {},
inceptron: {},
nebius: {},
novita: {},
nscale: {},
Expand Down
44 changes: 44 additions & 0 deletions packages/inference/src/providers/inceptron.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import type { TextGenerationOutput } from "@huggingface/tasks";
import type { BodyParams } from "../types.js";
import { BaseConversationalTask, BaseTextGenerationTask } from "./providerHelper.js";
import { InferenceClientProviderOutputError } from "../errors.js";

const INCEPTRON_API_BASE_URL = "https://openrouter.inceptron.io";

export class InceptronConversationalTask extends BaseConversationalTask {
constructor() {
super("inceptron", INCEPTRON_API_BASE_URL);
}
}

export class InceptronTextGenerationTask extends BaseTextGenerationTask {
constructor() {
super("inceptron", INCEPTRON_API_BASE_URL);
}

override preparePayload(params: BodyParams): Record<string, unknown> {
const payload = super.preparePayload(params);
if (params.args.inputs) {
payload.prompt = params.args.inputs;
delete payload.inputs;
}
return payload;
}

override async getResponse(response: unknown): Promise<TextGenerationOutput> {
if (
typeof response === "object" &&
response &&
"choices" in response &&
Array.isArray(response.choices) &&
response.choices.length > 0 &&
"text" in response.choices[0]
) {
return {
generated_text: response.choices[0].text,
};
}

throw new InferenceClientProviderOutputError("Received malformed response from Inceptron text generation API");
}
}
2 changes: 2 additions & 0 deletions packages/inference/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ export const INFERENCE_PROVIDERS = [
"groq",
"hf-inference",
"hyperbolic",
"inceptron",
"nebius",
"novita",
"nscale",
Expand Down Expand Up @@ -93,6 +94,7 @@ export const PROVIDERS_HUB_ORGS: Record<InferenceProvider, string> = {
groq: "groq",
"hf-inference": "hf-inference",
hyperbolic: "Hyperbolic",
inceptron: "inceptron",
nebius: "nebius",
novita: "novita",
nscale: "nscale",
Expand Down
61 changes: 61 additions & 0 deletions packages/inference/test/InferenceClient.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1411,6 +1411,67 @@ describe.skip("InferenceClient", () => {
TIMEOUT
);

describe.concurrent(
"Inceptron",
() => {
const client = new InferenceClient(env.HF_INCEPTRON_KEY ?? "dummy");

HARDCODED_MODEL_INFERENCE_MAPPING.inceptron = {
"meta-llama/Llama-3.3-70B-Instruct": {
provider: "inceptron",
hfModelId: "meta-llama/Llama-3.3-70B-Instruct",
providerId: "meta-llama/Llama-3.3-70B-Instruct",
status: "live",
task: "conversational",
},
};

it("chatCompletion", async () => {
const res = await client.chatCompletion({
model: "meta-llama/Llama-3.3-70B-Instruct",
provider: "inceptron",
messages: [{ role: "user", content: "Complete this sentence with words, one plus one is equal " }],
});
if (res.choices && res.choices.length > 0) {
const completion = res.choices[0].message?.content;
expect(completion).toMatch(/(two|2)/i);
}
});

it("chatCompletion stream", async () => {
const stream = client.chatCompletionStream({
model: "meta-llama/Llama-3.3-70B-Instruct",
provider: "inceptron",
messages: [{ role: "user", content: "Complete the equation 1 + 1 = , just the answer" }],
}) as AsyncGenerator<ChatCompletionStreamOutput>;
let out = "";
for await (const chunk of stream) {
if (chunk.choices && chunk.choices.length > 0) {
out += chunk.choices[0].delta.content;
}
}
expect(out).toMatch(/(two|2)/i);
});

it("textGeneration", async () => {
const res = await client.textGeneration({
model: "meta-llama/Llama-3.3-70B-Instruct",
provider: "inceptron",
inputs: "Once upon a time,",
parameters: {
temperature: 0,
max_new_tokens: 20,
},
});

expect(res).toHaveProperty("generated_text");
expect(typeof res.generated_text).toBe("string");
expect(res.generated_text.length).toBeGreaterThan(0);
});
},
TIMEOUT
);

describe.concurrent(
"Nebius",
() => {
Expand Down