From f9cb8b4db2c8b34c4d1161707f82f63663b583d0 Mon Sep 17 00:00:00 2001 From: Zoltan Kis Date: Wed, 8 Oct 2025 16:15:38 +0300 Subject: [PATCH 1/6] Add a simple accelerator selection mechanism. Signed-off-by: Zoltan Kis --- index.bs | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/index.bs b/index.bs index 13149d43..3a9e9d1a 100644 --- a/index.bs +++ b/index.bs @@ -748,7 +748,7 @@ An {{MLContext}} interface represents a global state of neural network execution In a situation when a GPU context executes a graph with a constant or an input in the system memory as an {{ArrayBufferView}}, the input content is automatically uploaded from the system memory to the GPU memory, and downloaded back to the system memory of an {{ArrayBufferView}} output buffer at the end of the graph execution. This data upload and download cycles will only occur whenever the execution device requires the data to be copied out of and back into the system memory, such as in the case of the GPU. It doesn't occur when the device is a CPU device. Additionally, the result of the graph execution is in a known layout format. While the execution may be optimized for a native memory access pattern in an intermediate result within the graph, the output of the last operation of the graph must convert the content back to a known layout format at the end of the graph in order to maintain the expected behavior from the caller's perspective.
-When an {{MLContext}} is created with {{MLContextOptions}}, the user agent selects and creates the underlying execution device by taking into account these options, currently only the {{MLPowerPreference}} option. +When an {{MLContext}} is created with {{MLContextOptions}}, the user agent selects and creates the underlying execution device by taking into account these options. Depending on the underlying platform, the user agent may select different combinations of CPU, NPU and GPU devices.
@@ -978,6 +978,7 @@ enum MLPowerPreference { dictionary MLContextOptions { MLPowerPreference powerPreference = "default"; + boolean accelerated = true; }; [SecureContext, Exposed=(Window, Worker)] @@ -1001,6 +1002,8 @@ The powerPreference opt
Prioritizes power consumption over other considerations such as execution speed.
+The accelerated option indicates the application's preference as related to massively parallel acceleration. When set to `true` (by default), the underlying platform will attempt to use the available massively parallel accelerators, such as GPU or NPU, also depending on the {{MLContextOptions/powerPreference}}. When set to `false`, the application hints to prefer CPU inference. + ### {{ML/createContext()}} ### {#api-ml-createcontext}
@@ -1018,11 +1021,16 @@ The powerPreference opt 1. If |options| is a {{GPUDevice}} object, then: 1. Set |context|.{{MLContext/[[contextType]]}} to "[=context type/webgpu=]". 1. Set |context|.{{MLContext/[[powerPreference]]}} to {{MLPowerPreference/"default"}}. + 1. Set |context|.{{MLContext/[[accelerated]]}} to `true`. + 1. Set |context|.{{MLContext/[[cpuFallbackActive]]}} to `false`. 1. Otherwise: 1. Set |context|.{{MLContext/[[contextType]]}} to "[=context type/default=]". 1. Set |context|.{{MLContext/[[lost]]}} to [=a new promise=] in |realm|. 1. If |options|["{{MLContextOptions/powerPreference}}"] [=map/exists=], then set |context|.{{MLContext/[[powerPreference]]}} to |options|["{{MLContextOptions/powerPreference}}"]. 1. Otherwise, set |context|.{{MLContext/[[powerPreference]]}} to {{MLPowerPreference/"default"}}. + 1. If |options|["{{MLContextOptions/accelerated}}"] [=map/exists=], then set |context|.{{MLContext/[[accelerated]]}} to |options|["{{MLContextOptions/accelerated}}"]. + 1. Otherwise, set |context|.{{MLContext/[[accelerated]]}} to `true`. + 1. Set |context|.{{MLContext/[[cpuFallbackActive]]}} to `false`. 1. If the user agent cannot support |context|.{{MLContext/[[contextType]]}}, then return failure. 1. Return |context|. @@ -1082,6 +1090,8 @@ interface MLContext { undefined destroy(); + readonly attribute boolean accelerated; + readonly attribute boolean cpuFallbackActive; readonly attribute Promise lost; }; @@ -1095,6 +1105,12 @@ interface MLContext { : \[[powerPreference]] of type {{MLPowerPreference}}. :: The {{MLContext}}'s {{MLPowerPreference}}. + : \[[accelerated]] of type {{boolean}}. + :: + The {{MLContext}}'s processing type (CPU or massively parallel processing). + : \[[cpuFallbackActive]] of type {{boolean}}. + :: + The {{MLContext}}'s status for CPU fallback type (CPU or massively parallel processing). : \[[lost]] of type {{Promise}}<{{MLContextLostInfo}}>. :: A {{Promise}} that is resolved when the {{MLContext}}'s underlying execution device is no longer available. @@ -1178,7 +1194,8 @@ Note: `dispatch()` itself provides no signal that graph execution has completed. 1. If [=validating tensors with descriptors=] given |outputs| and |graph|.{{MLGraph/[[outputDescriptors]]}} returns false, then [=exception/throw=] a {{TypeError}}. 1. Enqueue the following steps to |graph|.{{MLGraph/[[context]]}}.{{MLContext/[[timeline]]}}: 1. Run these steps, but [=/abort when=] [=this=] [=MLContext/is lost=]: - 1. Issue a compute request to |graph|.{{MLGraph/[[implementation]]}} given |inputs| and |outputs|. + 1. Issue a compute request to |graph|.{{MLGraph/[[implementation]]}} given |inputs| and |outputs|, as well as |graph|.{{MLGraph/[[context]]}}.{{MLContext/[[powerPreference]]}} and |graph|.{{MLGraph/[[context]]}}.{{MLContext/[[accelerated]]}}. + 1. If |graph|.{{MLGraph/[[context]]}}.{{MLContext/[[accelerated]]}} is `true` and the underlying platform can only do CPU inference at the moment, then set |graph|.{{MLGraph/[[context]]}}.{{MLContext/[[cpuFallbackActive]]}} to `true`, otherwise set it to `false`. Issue(778): Add a mechanism for reporting errors during graph execution. @@ -1730,7 +1747,7 @@ typedef (bigint or unrestricted double) MLNumber; : \[[operator]] of type [=operator=] :: Reference to {{MLOperand}}'s corresponding [=operator=]. - + : \[[constantTensor]] of type {{MLTensor}} :: The {{MLOperand}}'s tensor (only for constant operands). @@ -2151,7 +2168,7 @@ Build a composed graph up to a given output operand into a computational graph a 1. If |name| is empty, then return [=a new promise=] in |realm| [=rejected=] with a {{TypeError}}. 1. If [=MLGraphBuilder/validating operand=] given [=this=] and |operand| returns false, then return [=a new promise=] in |realm| [=rejected=] with a {{TypeError}}. 1. If |operand| is in [=this=]'s [=MLGraphBuilder/graph=]'s [=computational graph/inputs=] or [=computational graph/constants=], then return [=a new promise=] in |realm| [=rejected=] with a {{TypeError}}. - 1. If |operand|.{{MLOperand/[[constantTensor]]}} exists and |operand|.{{MLOperand/[[constantTensor]]}}.{{MLTensor/[[isDestroyed]]}} is true, then return [=a new promise=] in |realm| [=rejected=] with a {{TypeError}}. + 1. If |operand|.{{MLOperand/[[constantTensor]]}} exists and |operand|.{{MLOperand/[[constantTensor]]}}.{{MLTensor/[[isDestroyed]]}} is true, then return [=a new promise=] in |realm| [=rejected=] with a {{TypeError}}. 1. Let |operands| be a new empty [=/set=]. 1. Let |operators| be a new empty [=/set=]. 1. Let |inputs| be a new empty [=/set=]. From e1d90dee9262e6fce2d095f925fbec070831c41e Mon Sep 17 00:00:00 2001 From: Zoltan Kis Date: Mon, 20 Oct 2025 23:24:47 +0300 Subject: [PATCH 2/6] Add getters for context.accelerated and context.cpuFallbackActive. Add the poll CPU fallback status steps. Invoke it from graph.dispatch(). Signed-off-by: Zoltan Kis --- index.bs | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/index.bs b/index.bs index 3a9e9d1a..70e93925 100644 --- a/index.bs +++ b/index.bs @@ -1022,7 +1022,7 @@ The accelerated option 1. Set |context|.{{MLContext/[[contextType]]}} to "[=context type/webgpu=]". 1. Set |context|.{{MLContext/[[powerPreference]]}} to {{MLPowerPreference/"default"}}. 1. Set |context|.{{MLContext/[[accelerated]]}} to `true`. - 1. Set |context|.{{MLContext/[[cpuFallbackActive]]}} to `false`. + 1. Set |context|.{{MLContext/[[cpuFallbackActive]]}} to `undefined`. 1. Otherwise: 1. Set |context|.{{MLContext/[[contextType]]}} to "[=context type/default=]". 1. Set |context|.{{MLContext/[[lost]]}} to [=a new promise=] in |realm|. @@ -1030,7 +1030,7 @@ The accelerated option 1. Otherwise, set |context|.{{MLContext/[[powerPreference]]}} to {{MLPowerPreference/"default"}}. 1. If |options|["{{MLContextOptions/accelerated}}"] [=map/exists=], then set |context|.{{MLContext/[[accelerated]]}} to |options|["{{MLContextOptions/accelerated}}"]. 1. Otherwise, set |context|.{{MLContext/[[accelerated]]}} to `true`. - 1. Set |context|.{{MLContext/[[cpuFallbackActive]]}} to `false`. + 1. Set |context|.{{MLContext/[[cpuFallbackActive]]}} to `undefined`. 1. If the user agent cannot support |context|.{{MLContext/[[contextType]]}}, then return failure. 1. Return |context|. @@ -1130,6 +1130,28 @@ The context type is the type of the execution context that manages th
Context created from WebGPU device.
+
+The accelerated getter steps are to return [=this=].{{MLContext/[[accelerated]]}}. +
+ +
+ The cpuFallbackActive getter steps are: + 1. If [=this=].{{MLContext/[[cpuFallbackActive]]}} is `undefined`, then invoke [=poll CPU fallback status=]. + 1. Return [=this=].{{MLContext/[[cpuFallbackActive]]}}. +
+ +
+ + To poll CPU fallback status, run the following steps. + + 1. If [=this=].{{MLContext/[[accelerated]]}} is `false`, then: + 1. Set [=this=].{{MLContext/[[cpuFallbackActive]]}} to `true` and return. + 1. If the underlying execution device is available, then: + 1. Issue a request to check whether the device executes the workload on CPU. If yes, then set [=this=].{{MLContext/[[cpuFallbackActive]]}} to `true` and return. + 1. Otherwise, set [=this=].{{MLContext/[[cpuFallbackActive]]}} to `false` and return. + 1. Set [=this=].{{MLContext/[[cpuFallbackActive]]}} to `undefined`. +
+
To validate buffer with descriptor given {{AllowSharedBufferSource}} |bufferSource| and {{MLOperandDescriptor}} |descriptor|, run the following steps: @@ -1195,7 +1217,7 @@ Note: `dispatch()` itself provides no signal that graph execution has completed. 1. Enqueue the following steps to |graph|.{{MLGraph/[[context]]}}.{{MLContext/[[timeline]]}}: 1. Run these steps, but [=/abort when=] [=this=] [=MLContext/is lost=]: 1. Issue a compute request to |graph|.{{MLGraph/[[implementation]]}} given |inputs| and |outputs|, as well as |graph|.{{MLGraph/[[context]]}}.{{MLContext/[[powerPreference]]}} and |graph|.{{MLGraph/[[context]]}}.{{MLContext/[[accelerated]]}}. - 1. If |graph|.{{MLGraph/[[context]]}}.{{MLContext/[[accelerated]]}} is `true` and the underlying platform can only do CPU inference at the moment, then set |graph|.{{MLGraph/[[context]]}}.{{MLContext/[[cpuFallbackActive]]}} to `true`, otherwise set it to `false`. + 1. Run the steps to [=poll CPU fallback status=] for |graph|.{{MLGraph/[[context]]}}. Issue(778): Add a mechanism for reporting errors during graph execution. From 093adaa161e0a46270d141bf97aaab563b659e1d Mon Sep 17 00:00:00 2001 From: Zoltan Kis Date: Sat, 25 Oct 2025 11:38:33 +0300 Subject: [PATCH 3/6] Update index.bs Co-authored-by: Reilly Grant --- index.bs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/index.bs b/index.bs index 70e93925..e218180e 100644 --- a/index.bs +++ b/index.bs @@ -1002,7 +1002,7 @@ The powerPreference opt
Prioritizes power consumption over other considerations such as execution speed.
-The accelerated option indicates the application's preference as related to massively parallel acceleration. When set to `true` (by default), the underlying platform will attempt to use the available massively parallel accelerators, such as GPU or NPU, also depending on the {{MLContextOptions/powerPreference}}. When set to `false`, the application hints to prefer CPU inference. +The accelerated option indicates the application's preference as related to massively parallel acceleration. When set to `true` (by default), the underlying platform will attempt to use the available massively parallel accelerators, such as a GPU or NPU, also depending on the {{MLContextOptions/powerPreference}}. When set to `false`, the application indicates it prefers unaccelerated CPU inference. ### {{ML/createContext()}} ### {#api-ml-createcontext} From 36f6902757d4967b6358ba65dc4d1dfa3ff45973 Mon Sep 17 00:00:00 2001 From: Zoltan Kis Date: Tue, 28 Oct 2025 11:53:48 +0200 Subject: [PATCH 4/6] Address review comments, remove check from dispatch() steps, modify the steps checking CPU fallback Signed-off-by: Zoltan Kis --- index.bs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/index.bs b/index.bs index e218180e..d3357857 100644 --- a/index.bs +++ b/index.bs @@ -1146,8 +1146,10 @@ The accelerated getter steps are to return [=
1. If [=this=].{{MLContext/[[accelerated]]}} is `false`, then: 1. Set [=this=].{{MLContext/[[cpuFallbackActive]]}} to `true` and return. - 1. If the underlying execution device is available, then: - 1. Issue a request to check whether the device executes the workload on CPU. If yes, then set [=this=].{{MLContext/[[cpuFallbackActive]]}} to `true` and return. + 1. Issue a request to check whether the underlying platform uses CPU as the main underlying execution device for inference. If yes, then implementations should set [=this=].{{MLContext/[[cpuFallbackActive]]}} to `true` and return. +
+ Depending on the underlying platform, implementations may be able to detect if using the CPU covers only a small portion of the compute graph in a heterogeneous execution plan involving massively parallel processing as well (in which case it is not a CPU fallback), or is it the main underlying execution device as a result of a fallback scenario. +
1. Otherwise, set [=this=].{{MLContext/[[cpuFallbackActive]]}} to `false` and return. 1. Set [=this=].{{MLContext/[[cpuFallbackActive]]}} to `undefined`.
@@ -1217,7 +1219,6 @@ Note: `dispatch()` itself provides no signal that graph execution has completed. 1. Enqueue the following steps to |graph|.{{MLGraph/[[context]]}}.{{MLContext/[[timeline]]}}: 1. Run these steps, but [=/abort when=] [=this=] [=MLContext/is lost=]: 1. Issue a compute request to |graph|.{{MLGraph/[[implementation]]}} given |inputs| and |outputs|, as well as |graph|.{{MLGraph/[[context]]}}.{{MLContext/[[powerPreference]]}} and |graph|.{{MLGraph/[[context]]}}.{{MLContext/[[accelerated]]}}. - 1. Run the steps to [=poll CPU fallback status=] for |graph|.{{MLGraph/[[context]]}}. Issue(778): Add a mechanism for reporting errors during graph execution. From cb5213c83b8d5e25fa670a5c0265a75f5729fa3f Mon Sep 17 00:00:00 2001 From: Zoltan Kis Date: Wed, 29 Oct 2025 21:15:33 +0100 Subject: [PATCH 5/6] Remove cpuFallbackActive for later introduction Signed-off-by: Zoltan Kis --- index.bs | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/index.bs b/index.bs index d3357857..c3e582f3 100644 --- a/index.bs +++ b/index.bs @@ -1022,7 +1022,6 @@ The accelerated option 1. Set |context|.{{MLContext/[[contextType]]}} to "[=context type/webgpu=]". 1. Set |context|.{{MLContext/[[powerPreference]]}} to {{MLPowerPreference/"default"}}. 1. Set |context|.{{MLContext/[[accelerated]]}} to `true`. - 1. Set |context|.{{MLContext/[[cpuFallbackActive]]}} to `undefined`. 1. Otherwise: 1. Set |context|.{{MLContext/[[contextType]]}} to "[=context type/default=]". 1. Set |context|.{{MLContext/[[lost]]}} to [=a new promise=] in |realm|. @@ -1030,7 +1029,6 @@ The accelerated option 1. Otherwise, set |context|.{{MLContext/[[powerPreference]]}} to {{MLPowerPreference/"default"}}. 1. If |options|["{{MLContextOptions/accelerated}}"] [=map/exists=], then set |context|.{{MLContext/[[accelerated]]}} to |options|["{{MLContextOptions/accelerated}}"]. 1. Otherwise, set |context|.{{MLContext/[[accelerated]]}} to `true`. - 1. Set |context|.{{MLContext/[[cpuFallbackActive]]}} to `undefined`. 1. If the user agent cannot support |context|.{{MLContext/[[contextType]]}}, then return failure. 1. Return |context|. @@ -1091,7 +1089,6 @@ interface MLContext { undefined destroy(); readonly attribute boolean accelerated; - readonly attribute boolean cpuFallbackActive; readonly attribute Promise lost; }; @@ -1108,9 +1105,6 @@ interface MLContext { : \[[accelerated]] of type {{boolean}}. :: The {{MLContext}}'s processing type (CPU or massively parallel processing). - : \[[cpuFallbackActive]] of type {{boolean}}. - :: - The {{MLContext}}'s status for CPU fallback type (CPU or massively parallel processing). : \[[lost]] of type {{Promise}}<{{MLContextLostInfo}}>. :: A {{Promise}} that is resolved when the {{MLContext}}'s underlying execution device is no longer available. @@ -1134,26 +1128,6 @@ The context type is the type of the execution context that manages th The accelerated getter steps are to return [=this=].{{MLContext/[[accelerated]]}}.
-
- The cpuFallbackActive getter steps are: - 1. If [=this=].{{MLContext/[[cpuFallbackActive]]}} is `undefined`, then invoke [=poll CPU fallback status=]. - 1. Return [=this=].{{MLContext/[[cpuFallbackActive]]}}. -
- -
- - To poll CPU fallback status, run the following steps. - - 1. If [=this=].{{MLContext/[[accelerated]]}} is `false`, then: - 1. Set [=this=].{{MLContext/[[cpuFallbackActive]]}} to `true` and return. - 1. Issue a request to check whether the underlying platform uses CPU as the main underlying execution device for inference. If yes, then implementations should set [=this=].{{MLContext/[[cpuFallbackActive]]}} to `true` and return. -
- Depending on the underlying platform, implementations may be able to detect if using the CPU covers only a small portion of the compute graph in a heterogeneous execution plan involving massively parallel processing as well (in which case it is not a CPU fallback), or is it the main underlying execution device as a result of a fallback scenario. -
- 1. Otherwise, set [=this=].{{MLContext/[[cpuFallbackActive]]}} to `false` and return. - 1. Set [=this=].{{MLContext/[[cpuFallbackActive]]}} to `undefined`. -
-
To validate buffer with descriptor given {{AllowSharedBufferSource}} |bufferSource| and {{MLOperandDescriptor}} |descriptor|, run the following steps: From a73a443bc7be4ddb6c03f7b5f8dbd0389f75323e Mon Sep 17 00:00:00 2001 From: Zoltan Kis Date: Wed, 29 Oct 2025 21:28:02 +0100 Subject: [PATCH 6/6] Move accelerated as input to the build() steps, remove from dispatch() Signed-off-by: Zoltan Kis --- index.bs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/index.bs b/index.bs index c3e582f3..465df922 100644 --- a/index.bs +++ b/index.bs @@ -1192,7 +1192,7 @@ Note: `dispatch()` itself provides no signal that graph execution has completed. 1. If [=validating tensors with descriptors=] given |outputs| and |graph|.{{MLGraph/[[outputDescriptors]]}} returns false, then [=exception/throw=] a {{TypeError}}. 1. Enqueue the following steps to |graph|.{{MLGraph/[[context]]}}.{{MLContext/[[timeline]]}}: 1. Run these steps, but [=/abort when=] [=this=] [=MLContext/is lost=]: - 1. Issue a compute request to |graph|.{{MLGraph/[[implementation]]}} given |inputs| and |outputs|, as well as |graph|.{{MLGraph/[[context]]}}.{{MLContext/[[powerPreference]]}} and |graph|.{{MLGraph/[[context]]}}.{{MLContext/[[accelerated]]}}. + 1. Issue a compute request to |graph|.{{MLGraph/[[implementation]]}} given |inputs| and |outputs|. Issue(778): Add a mechanism for reporting errors during graph execution. @@ -2189,7 +2189,7 @@ Build a composed graph up to a given output operand into a computational graph a 1. Let |promise| be [=a new promise=] in |realm|. 1. Enqueue the following steps to |graph|.{{MLGraph/[[context]]}}.{{MLContext/[[timeline]]}}: 1. Run these steps, but [=/abort when=] |graph|.{{MLGraph/[[context]]}} [=MLContext/is lost=]: - 1. Let |graphImpl| be the result of converting [=this=]'s [=MLGraphBuilder/graph=] with |operands|, |operators|, |inputs|, and |outputs|'s [=map/values=] into an [=implementation-defined=] format which can be interpreted by the underlying platform. + 1. Let |graphImpl| be the result of converting [=this=]'s [=MLGraphBuilder/graph=] with |operands|, |operators|, |inputs|, and |outputs|'s [=map/values=], as well as |graph|.{{MLGraph/[[context]]}}.{{MLContext/[[powerPreference]]}} and |graph|.{{MLGraph/[[context]]}}.{{MLContext/[[accelerated]]}} into an [=implementation-defined=] format which can be interpreted by the underlying platform. 1. If the previous step failed, then [=queue an ML task=] with |global| to [=reject=] |promise| with an "{{OperationError}}" {{DOMException}}, and abort these steps. 1. Set |graph|.{{MLGraph/[[implementation]]}} to |graphImpl|. 1. [=Queue an ML task=] with |global| to [=resolve=] |promise| with |graph|.