diff --git a/packages/hub/src/lib/create-repo.ts b/packages/hub/src/lib/create-repo.ts index c0323dc112..b3a9e7be11 100644 --- a/packages/hub/src/lib/create-repo.ts +++ b/packages/hub/src/lib/create-repo.ts @@ -47,7 +47,7 @@ export async function createRepo( ...(repoId.type === "space" ? { type: "space", - sdk: "static", + sdk: params.sdk ?? "static", } : { type: repoId.type, diff --git a/packages/inference/src/lib/getProviderHelper.ts b/packages/inference/src/lib/getProviderHelper.ts index 5836fb9f73..3e95eceb8c 100644 --- a/packages/inference/src/lib/getProviderHelper.ts +++ b/packages/inference/src/lib/getProviderHelper.ts @@ -144,6 +144,7 @@ export const PROVIDERS: Record { + return { + input: { + ...omit(params.args, ["inputs", "parameters"]), + ...(params.args.parameters as Record), + audio: params.args.inputs, // This will be processed in preparePayloadAsync + }, + version: params.model.includes(":") ? params.model.split(":")[1] : undefined, + }; + } + + async preparePayloadAsync(args: AutomaticSpeechRecognitionArgs): Promise { + const blob = "data" in args && args.data instanceof Blob ? args.data : "inputs" in args ? args.inputs : undefined; + + if (!blob || !(blob instanceof Blob)) { + throw new Error("Audio input must be a Blob"); + } + + // Convert Blob to base64 data URL + const bytes = new Uint8Array(await blob.arrayBuffer()); + const base64 = base64FromBytes(bytes); + const audioInput = `data:${blob.type || "audio/wav"};base64,${base64}`; + + return { + ...("data" in args ? omit(args, "data") : omit(args, "inputs")), + inputs: audioInput, + }; + } + + override async getResponse(response: ReplicateOutput): Promise { + if (typeof response?.output === "string") return { text: response.output }; + if (Array.isArray(response?.output) && typeof response.output[0] === "string") return { text: response.output[0] }; + + const out = response?.output as + | undefined + | { + transcription?: string; + translation?: string; + txt_file?: string; + }; + if (out && typeof out === "object") { + if (typeof out.transcription === "string") return { text: out.transcription }; + if (typeof out.translation === "string") return { text: out.translation }; + if (typeof out.txt_file === "string") { + const r = await fetch(out.txt_file); + return { text: await r.text() }; + } + } + throw new InferenceClientProviderOutputError( + "Received malformed response from Replicate automatic-speech-recognition API" + ); + } +} + export class ReplicateImageToImageTask extends ReplicateTask implements ImageToImageTaskHelper { override preparePayload(params: BodyParams): Record { return { diff --git a/packages/inference/src/tasks/audio/automaticSpeechRecognition.ts b/packages/inference/src/tasks/audio/automaticSpeechRecognition.ts index a8ce6ebed6..c5a716d5bc 100644 --- a/packages/inference/src/tasks/audio/automaticSpeechRecognition.ts +++ b/packages/inference/src/tasks/audio/automaticSpeechRecognition.ts @@ -4,7 +4,6 @@ import { getProviderHelper } from "../../lib/getProviderHelper.js"; import type { BaseArgs, Options } from "../../types.js"; import { innerRequest } from "../../utils/request.js"; import type { LegacyAudioInput } from "./utils.js"; -import { InferenceClientProviderOutputError } from "../../errors.js"; export type AutomaticSpeechRecognitionArgs = BaseArgs & (AutomaticSpeechRecognitionInput | LegacyAudioInput); /** @@ -22,9 +21,5 @@ export async function automaticSpeechRecognition( ...options, task: "automatic-speech-recognition", }); - const isValidOutput = typeof res?.text === "string"; - if (!isValidOutput) { - throw new InferenceClientProviderOutputError("Received malformed response from automatic-speech-recognition API"); - } return providerHelper.getResponse(res); } diff --git a/packages/ollama-utils/package.json b/packages/ollama-utils/package.json index df3ccf7cb3..4023fc1cf5 100644 --- a/packages/ollama-utils/package.json +++ b/packages/ollama-utils/package.json @@ -1,7 +1,7 @@ { "name": "@huggingface/ollama-utils", "packageManager": "pnpm@10.10.0", - "version": "0.0.12", + "version": "0.0.13", "description": "Various utilities for maintaining Ollama compatibility with models on Hugging Face hub", "repository": "https://github.com/huggingface/huggingface.js.git", "publishConfig": { diff --git a/packages/ollama-utils/src/chat-template-automap.ts b/packages/ollama-utils/src/chat-template-automap.ts index a600cf7d24..18add81dda 100644 --- a/packages/ollama-utils/src/chat-template-automap.ts +++ b/packages/ollama-utils/src/chat-template-automap.ts @@ -5,19 +5,19 @@ import type { OllamaChatTemplateMapEntry } from "./types"; /** * Skipped these models due to error: - * - library/minicpm-v:latest - * - library/qwen2:latest - * - library/qwen2.5:0.5b - * - library/llama4:latest - * - library/command-r:latest - * - library/phi4-reasoning:latest + * - library/llama3.2:latest + * - library/llama2:latest + * - library/llama3.1:latest + * - library/deepseek-v3:latest * - library/cogito:3b - * - library/starcoder:latest - * - library/mistral-small3.1:latest - * - library/cogito:latest - * - library/aya-expanse:latest - * - library/smallthinker:3b - * - library/command-r7b:7b + * - library/phi4-mini:latest + * - library/qwen3-coder:latest + * - library/granite3.2-vision:latest + * - library/opencoder:latest + * - library/opencoder:1.5b + * - library/phind-codellama:latest + * - library/yarn-mistral:latest + * - library/stablelm-zephyr:latest */ export const OLLAMA_CHAT_TEMPLATE_MAPPING: OllamaChatTemplateMapEntry[] = [ @@ -503,6 +503,20 @@ export const OLLAMA_CHAT_TEMPLATE_MAPPING: OllamaChatTemplateMapEntry[] = [ }, }, }, + { + model: "library/gemma3:270m", + gguf: "{{ bos_token }}\n{%- if messages[0]['role'] == 'system' -%}\n {%- if messages[0]['content'] is string -%}\n {%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}\n {%- else -%}\n {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}\n {%- endif -%}\n {%- set loop_messages = messages[1:] -%}\n{%- else -%}\n {%- set first_user_prefix = \"\" -%}\n {%- set loop_messages = messages -%}\n{%- endif -%}\n{%- for message in loop_messages -%}\n {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}\n {{ raise_exception(\"Conversation roles must alternate user/assistant/user/assistant/...\") }}\n {%- endif -%}\n {%- if (message['role'] == 'assistant') -%}\n {%- set role = \"model\" -%}\n {%- else -%}\n {%- set role = message['role'] -%}\n {%- endif -%}\n {{ '' + role + '\n' + (first_user_prefix if loop.first else \"\") }}\n {%- if message['content'] is string -%}\n {{ message['content'] | trim }}\n {%- elif message['content'] is iterable -%}\n {%- for item in message['content'] -%}\n {%- if item['type'] == 'image' -%}\n {{ '' }}\n {%- elif item['type'] == 'text' -%}\n {{ item['text'] | trim }}\n {%- endif -%}\n {%- endfor -%}\n {%- else -%}\n {{ raise_exception(\"Invalid content type\") }}\n {%- endif -%}\n {{ '\n' }}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{'model\n'}}\n{%- endif -%}\n", + ollama: { + template: + '{{- $systemPromptAdded := false }}\n{{- range $i, $_ := .Messages }}\n{{- $last := eq (len (slice $.Messages $i)) 1 }}\n{{- if eq .Role "user" }}user\n{{- if (and (not $systemPromptAdded) $.System) }}\n{{- $systemPromptAdded = true }}\n{{ $.System }}\n{{ end }}\n{{ .Content }}\n{{ if $last }}model\n{{ end }}\n{{- else if eq .Role "assistant" }}model\n{{ .Content }}{{ if not $last }}\n{{ end }}\n{{- end }}\n{{- end }}', + tokens: ["", "", ""], + params: { + stop: [""], + top_k: 64, + top_p: 0.95, + }, + }, + }, { model: "library/glm4:9b", gguf: "[gMASK]{% for item in messages %}{% if item['tools'] is defined %}<|system|>\n你是一个名为 ChatGLM 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的,你的任务是针对用户的问题和要求提供适当的答复和支持。\n\n# 可用工具{% set tools = item['tools'] %}{% for tool in tools %}{% if tool['type'] == 'function' %}\n\n## {{ tool['function']['name'] }}\n\n{{ tool['function'] | tojson(indent=4) }}\n在调用上述函数时,请使用 Json 格式表示调用的参数。{% elif tool['type'] == 'python' %}\n\n## python\n\n当你向 `python` 发送包含 Python 代码的消息时,该代码将会在一个有状态的 Jupyter notebook 环境中执行。\n`python` 返回代码执行的输出,或在执行 60 秒后返回超时。\n`/mnt/data` 将会持久化存储你的文件。在此会话中,`python` 无法访问互联网。不要使用 `python` 进行任何网络请求或者在线 API 调用,这些在线内容的访问将不会成功。{% elif tool['type'] == 'simple_browser' %}\n\n## simple_browser\n\n你可以使用 `simple_browser` 工具。该工具支持以下函数:\n`search(query: str, recency_days: int)`:使用搜索引擎进行查询并显示结果,可以使用 `recency_days` 参数控制搜索内容的时效性。\n`mclick(ids: list[int])`:获取一系列指定 id 的页面内容。每次调用时,须选择3-10个页面。选择多个角度的页面,同时尽可能选择可信任的信息来源。考虑到部分页面是无法加载的,你也可以多打开一些可能有用的页面而不用担心内容过多。\n`open_url(url: str)`:打开指定的 URL。\n\n使用 `【{引用 id}†{引用文本}】` 来引用内容。\n\n操作步骤:1. 使用 `search` 来获得信息列表; 2. 使用 `mclick` 来获取指定 ID 页面的内容; 3. 根据获得的内容进行回复。在回复中应当引用信息来源。\n 如果用户提供了 URL,也可以用 `open_url` 直接打开页面。\n如果初次搜索结果没有找到合适的信息,也可以再次使用 `search` 进行搜索。{% elif tool['type'] == 'cogview' %}\n\n## cogview\n\n如果用户的请求中包含了对图像的描述,你可以使用 `cogview` 来生成图像并展示给用户。你需要向 `cogview` 发送图像描述,规则:\n- 发送给 `cogview` 的消息必须使用英语。用户的中文描述必须完全翻译为英语。\n- 应当尽可能详细地描述图像生成的需求,需求描述约 100 英文单词。\n- 保持用户原始描述的意图。不要虚构内容或者没见过的人物。\n- 如无特殊说明,所在地为中国,持有中国立场并遵循中国社会主义价值观。{% endif %}{% endfor %}{% endif %}{% if item['content'] %}<|{{ item['role'] }}|>{{ item['metadata'] }}\n{{ item['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}", diff --git a/packages/tasks/package.json b/packages/tasks/package.json index b227060dc9..619cce971d 100644 --- a/packages/tasks/package.json +++ b/packages/tasks/package.json @@ -1,7 +1,7 @@ { "name": "@huggingface/tasks", "packageManager": "pnpm@10.10.0", - "version": "0.19.35", + "version": "0.19.37", "description": "List of ML tasks for huggingface.co/tasks", "repository": "https://github.com/huggingface/huggingface.js.git", "publishConfig": { diff --git a/packages/tasks/src/local-apps.ts b/packages/tasks/src/local-apps.ts index 144c57bdca..7c9076204a 100644 --- a/packages/tasks/src/local-apps.ts +++ b/packages/tasks/src/local-apps.ts @@ -315,6 +315,31 @@ const snippetDockerModelRunner = (model: ModelData, filepath?: string): string = return `docker model run hf.co/${model.id}${getQuantTag(filepath)}`; }; +const snippetLemonade = (model: ModelData, filepath?: string): LocalAppSnippet[] => { + const tagName = getQuantTag(filepath); + const modelName = model.id.split("/")[1]; + return [ + { + title: "Pull the model", + setup: "# Download Lemonade from https://lemonade-server.ai/", + content: [ + `lemonade-server pull user.${modelName} \\ + --checkpoint ${model.id}${tagName} \\ + --recipe llamacpp`, + "# Note: If you installed from source, use the lemonade-server-dev command instead.", + ].join("\n"), + }, + { + title: "Run and chat with the model", + content: `lemonade-server run user.${modelName}`, + }, + { + title: "List all available models", + content: "lemonade-server list", + }, + ]; +}; + /** * Add your new local app here. * @@ -492,6 +517,21 @@ export const LOCAL_APPS = { displayOnModelPage: isLlamaCppGgufModel, snippet: snippetDockerModelRunner, }, + lemonade: { + prettyLabel: "Lemonade", + docsUrl: "https://lemonade-server.ai", + mainTask: "text-generation", + displayOnModelPage: isLlamaCppGgufModel, + snippet: snippetLemonade, + }, + aifx: { + prettyLabel: "AIFX", + docsUrl: "https://aifxart.com", + mainTask: "text-to-image", + macOSOnly: false, + displayOnModelPage: (model) => model.library_name === "diffusers" && model.pipeline_tag === "text-to-image", + deeplink: (model) => new URL(`https://aifxart.com/pages/model-view?model=${model.id}`), + } } satisfies Record; export type LocalAppKey = keyof typeof LOCAL_APPS; diff --git a/packages/tasks/src/model-libraries-snippets.ts b/packages/tasks/src/model-libraries-snippets.ts index 15e1774fe7..dd2044ea9d 100644 --- a/packages/tasks/src/model-libraries-snippets.ts +++ b/packages/tasks/src/model-libraries-snippets.ts @@ -1523,7 +1523,7 @@ export const transformers = (model: ModelData): string[] => { autoSnippet.push( "# Load model directly", `from transformers import ${info.auto_model}`, - `model = ${info.auto_model}.from_pretrained("${model.id}"` + remote_code_snippet + ', torch_dtype="auto"),' + `model = ${info.auto_model}.from_pretrained("${model.id}"` + remote_code_snippet + ', torch_dtype="auto")' ); } @@ -1703,6 +1703,16 @@ export const vfimamba = (model: ModelData): string[] => [ model = Model.from_pretrained("${model.id}")`, ]; +export const lvface = (model: ModelData): string[] => [ + `from huggingface_hub import hf_hub_download + from inference_onnx import LVFaceONNXInferencer + +model_path = hf_hub_download("${model.id}", "LVFace-L_Glint360K/LVFace-L_Glint360K.onnx") +inferencer = LVFaceONNXInferencer(model_path, use_gpu=True, timeout=300) +img_path = 'path/to/image1.jpg' +embedding = inferencer.infer_from_image(img_path)`, +]; + export const voicecraft = (model: ModelData): string[] => [ `from voicecraft import VoiceCraft diff --git a/packages/tasks/src/model-libraries.ts b/packages/tasks/src/model-libraries.ts index d267b199dd..adea9c196c 100644 --- a/packages/tasks/src/model-libraries.ts +++ b/packages/tasks/src/model-libraries.ts @@ -130,6 +130,13 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = { filter: false, countDownloads: `path:"llm_config.json"`, }, + bboxmaskpose: { + prettyLabel: "BBoxMaskPose", + repoName: "BBoxMaskPose", + repoUrl: "https://github.com/MiraPurkrabek/BBoxMaskPose", + filter: false, + countDownloads: `path_extension:"pth"`, + }, ben2: { prettyLabel: "BEN2", repoName: "BEN2", @@ -1156,6 +1163,13 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = { countDownloads: `path_extension:"pkl"`, snippets: snippets.vfimamba, }, + lvface: { + prettyLabel: "LVFace", + repoName: "LVFace", + repoUrl: "https://github.com/bytedance/LVFace", + countDownloads: `path_extension:"pt" OR path_extension:"onnx"`, + snippets: snippets.lvface, + }, voicecraft: { prettyLabel: "VoiceCraft", repoName: "VoiceCraft",