Skip to content

Commit a5c3ea5

Browse files
committed
implements inceptron provider
1 parent a757f69 commit a5c3ea5

File tree

7 files changed

+116
-0
lines changed

7 files changed

+116
-0
lines changed

packages/hub/src/types/api/api-collection.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ interface ApiCollectionItemModel extends ApiCollectionItemBase {
5757
| "groq"
5858
| "hf-inference"
5959
| "hyperbolic"
60+
| "inceptron"
6061
| "nebius"
6162
| "novita"
6263
| "nscale"

packages/inference/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ Currently, we support the following providers:
5252
- [Fireworks AI](https://fireworks.ai)
5353
- [HF Inference](https://huggingface.co/docs/inference-providers/providers/hf-inference)
5454
- [Hyperbolic](https://hyperbolic.xyz)
55+
- [Inceptron](https://inceptron.io)
5556
- [Nebius](https://studio.nebius.ai)
5657
- [Novita](https://novita.ai)
5758
- [Nscale](https://nscale.com)
@@ -93,6 +94,7 @@ Only a subset of models are supported when requesting third-party providers. You
9394
- [Fireworks AI supported models](https://huggingface.co/api/partners/fireworks-ai/models)
9495
- [HF Inference supported models](https://huggingface.co/api/partners/hf-inference/models)
9596
- [Hyperbolic supported models](https://huggingface.co/api/partners/hyperbolic/models)
97+
- [Inceptron supported models](https://huggingface.co/api/partners/inceptron/models)
9698
- [Nebius supported models](https://huggingface.co/api/partners/nebius/models)
9799
- [Nscale supported models](https://huggingface.co/api/partners/nscale/models)
98100
- [OVHcloud supported models](https://huggingface.co/api/partners/ovhcloud/models)

packages/inference/src/lib/getProviderHelper.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import * as Fireworks from "../providers/fireworks-ai.js";
99
import * as Groq from "../providers/groq.js";
1010
import * as HFInference from "../providers/hf-inference.js";
1111
import * as Hyperbolic from "../providers/hyperbolic.js";
12+
import * as Inceptron from "../providers/inceptron.js";
1213
import * as Nebius from "../providers/nebius.js";
1314
import * as Novita from "../providers/novita.js";
1415
import * as Nscale from "../providers/nscale.js";
@@ -128,6 +129,10 @@ export const PROVIDERS: Record<InferenceProvider, Partial<Record<InferenceTask,
128129
conversational: new Hyperbolic.HyperbolicConversationalTask(),
129130
"text-generation": new Hyperbolic.HyperbolicTextGenerationTask(),
130131
},
132+
inceptron: {
133+
conversational: new Inceptron.InceptronConversationalTask(),
134+
"text-generation": new Inceptron.InceptronTextGenerationTask(),
135+
},
131136
nebius: {
132137
"text-to-image": new Nebius.NebiusTextToImageTask(),
133138
conversational: new Nebius.NebiusConversationalTask(),

packages/inference/src/providers/consts.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ export const HARDCODED_MODEL_INFERENCE_MAPPING: Record<
2929
groq: {},
3030
"hf-inference": {},
3131
hyperbolic: {},
32+
inceptron: {},
3233
nebius: {},
3334
novita: {},
3435
nscale: {},
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import type { TextGenerationOutput } from "@huggingface/tasks";
2+
import type { BodyParams } from "../types.js";
3+
import { BaseConversationalTask, BaseTextGenerationTask } from "./providerHelper.js";
4+
import { InferenceClientProviderOutputError } from "../errors.js";
5+
6+
const INCEPTRON_API_BASE_URL = "https://openrouter.inceptron.io";
7+
8+
export class InceptronConversationalTask extends BaseConversationalTask {
9+
constructor() {
10+
super("inceptron", INCEPTRON_API_BASE_URL);
11+
}
12+
}
13+
14+
export class InceptronTextGenerationTask extends BaseTextGenerationTask {
15+
constructor() {
16+
super("inceptron", INCEPTRON_API_BASE_URL);
17+
}
18+
19+
override preparePayload(params: BodyParams): Record<string, unknown> {
20+
const payload = super.preparePayload(params);
21+
if (params.args.inputs) {
22+
payload.prompt = params.args.inputs;
23+
delete payload.inputs;
24+
}
25+
return payload;
26+
}
27+
28+
override async getResponse(response: unknown): Promise<TextGenerationOutput> {
29+
if (
30+
typeof response === "object" &&
31+
response &&
32+
"choices" in response &&
33+
Array.isArray(response.choices) &&
34+
response.choices.length > 0 &&
35+
"text" in response.choices[0]
36+
) {
37+
return {
38+
generated_text: response.choices[0].text,
39+
};
40+
}
41+
42+
throw new InferenceClientProviderOutputError("Received malformed response from Inceptron text generation API");
43+
}
44+
}

packages/inference/src/types.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ export const INFERENCE_PROVIDERS = [
5656
"groq",
5757
"hf-inference",
5858
"hyperbolic",
59+
"inceptron",
5960
"nebius",
6061
"novita",
6162
"nscale",
@@ -93,6 +94,7 @@ export const PROVIDERS_HUB_ORGS: Record<InferenceProvider, string> = {
9394
groq: "groq",
9495
"hf-inference": "hf-inference",
9596
hyperbolic: "Hyperbolic",
97+
inceptron: "inceptron",
9698
nebius: "nebius",
9799
novita: "novita",
98100
nscale: "nscale",

packages/inference/test/InferenceClient.spec.ts

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1411,6 +1411,67 @@ describe.skip("InferenceClient", () => {
14111411
TIMEOUT
14121412
);
14131413

1414+
describe.concurrent(
1415+
"Inceptron",
1416+
() => {
1417+
const client = new InferenceClient(env.HF_INCEPTRON_KEY ?? "dummy");
1418+
1419+
HARDCODED_MODEL_INFERENCE_MAPPING.inceptron = {
1420+
"meta-llama/Llama-3.3-70B-Instruct": {
1421+
provider: "inceptron",
1422+
hfModelId: "meta-llama/Llama-3.3-70B-Instruct",
1423+
providerId: "meta-llama/Llama-3.3-70B-Instruct",
1424+
status: "live",
1425+
task: "conversational",
1426+
},
1427+
};
1428+
1429+
it("chatCompletion", async () => {
1430+
const res = await client.chatCompletion({
1431+
model: "meta-llama/Llama-3.3-70B-Instruct",
1432+
provider: "inceptron",
1433+
messages: [{ role: "user", content: "Complete this sentence with words, one plus one is equal " }],
1434+
});
1435+
if (res.choices && res.choices.length > 0) {
1436+
const completion = res.choices[0].message?.content;
1437+
expect(completion).toMatch(/(two|2)/i);
1438+
}
1439+
});
1440+
1441+
it("chatCompletion stream", async () => {
1442+
const stream = client.chatCompletionStream({
1443+
model: "meta-llama/Llama-3.3-70B-Instruct",
1444+
provider: "inceptron",
1445+
messages: [{ role: "user", content: "Complete the equation 1 + 1 = , just the answer" }],
1446+
}) as AsyncGenerator<ChatCompletionStreamOutput>;
1447+
let out = "";
1448+
for await (const chunk of stream) {
1449+
if (chunk.choices && chunk.choices.length > 0) {
1450+
out += chunk.choices[0].delta.content;
1451+
}
1452+
}
1453+
expect(out).toMatch(/(two|2)/i);
1454+
});
1455+
1456+
it("textGeneration", async () => {
1457+
const res = await client.textGeneration({
1458+
model: "meta-llama/Llama-3.3-70B-Instruct",
1459+
provider: "inceptron",
1460+
inputs: "Once upon a time,",
1461+
parameters: {
1462+
temperature: 0,
1463+
max_new_tokens: 20,
1464+
},
1465+
});
1466+
1467+
expect(res).toHaveProperty("generated_text");
1468+
expect(typeof res.generated_text).toBe("string");
1469+
expect(res.generated_text.length).toBeGreaterThan(0);
1470+
});
1471+
},
1472+
TIMEOUT
1473+
);
1474+
14141475
describe.concurrent(
14151476
"Nebius",
14161477
() => {

0 commit comments

Comments
 (0)