From 5bfbe6e8a1f384dde8fb66460eb7430c929cc910 Mon Sep 17 00:00:00 2001
From: David Kyle <david.kyle@elastic.co>
Date: Tue, 7 Oct 2025 13:06:13 +0100
Subject: [PATCH 1/5] Rename completions request

---
 .../examples/request/CompletionRequestExample1.yaml           | 4 ++--
 .../examples/response/CompletionResponseExample1.yaml         | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/specification/inference/completion/examples/request/CompletionRequestExample1.yaml b/specification/inference/completion/examples/request/CompletionRequestExample1.yaml
index ac5dd76499..8d53f21dfa 100644
--- a/specification/inference/completion/examples/request/CompletionRequestExample1.yaml
+++ b/specification/inference/completion/examples/request/CompletionRequestExample1.yaml
@@ -1,6 +1,6 @@
 summary: Completion task
-description: Run `POST _inference/completion/openai_chat_completions` to perform a completion on the example question.
-method_request: 'POST _inference/completion/openai_chat_completions'
+description: Run `POST _inference/completion/openai_completions` to perform a completion on the example question.
+method_request: 'POST _inference/completion/openai_completions'
 # type: "request"
 value: |-
   {
diff --git a/specification/inference/completion/examples/response/CompletionResponseExample1.yaml b/specification/inference/completion/examples/response/CompletionResponseExample1.yaml
index 0f2b454856..2ddadd3d15 100644
--- a/specification/inference/completion/examples/response/CompletionResponseExample1.yaml
+++ b/specification/inference/completion/examples/response/CompletionResponseExample1.yaml
@@ -1,6 +1,6 @@
 summary: Completion task
 description: >
-  A successful response from `POST _inference/completion/openai_chat_completions`.
+  A successful response from `POST _inference/completion/openai_completions`.
 # type: "response"
 # response_code:
 value: |-

From 35d93866a5d9dc6365fd98189c6f42502f47d666 Mon Sep 17 00:00:00 2001
From: David Kyle <david.kyle@elastic.co>
Date: Tue, 7 Oct 2025 13:06:30 +0100
Subject: [PATCH 2/5] Rerankers don't have chunking_settings

---
 .../inference/put/examples/request/InferencePutExample1.yaml | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/specification/inference/put/examples/request/InferencePutExample1.yaml b/specification/inference/put/examples/request/InferencePutExample1.yaml
index 4b33705804..c83f09194e 100644
--- a/specification/inference/put/examples/request/InferencePutExample1.yaml
+++ b/specification/inference/put/examples/request/InferencePutExample1.yaml
@@ -6,10 +6,5 @@ value: |-
    "service_settings": {
      "model_id": "rerank-english-v3.0",
      "api_key": "{{COHERE_API_KEY}}"
-   },
-   "chunking_settings": {
-     "strategy": "recursive",
-     "max_chunk_size": 200,
-     "separator_group": "markdown"
    }
   }

From 484f472fd16df1c7497c7d481573ee258ba4c69c Mon Sep 17 00:00:00 2001
From: David Kyle <david.kyle@elastic.co>
Date: Thu, 9 Oct 2025 10:11:47 +0100
Subject: [PATCH 3/5] use 'on this service'

---
 .../inference/chat_completion_unified/UnifiedRequest.ts         | 2 +-
 .../inference/stream_completion/StreamInferenceRequest.ts       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/specification/inference/chat_completion_unified/UnifiedRequest.ts b/specification/inference/chat_completion_unified/UnifiedRequest.ts
index 6602d9448d..6c79b0a6a2 100644
--- a/specification/inference/chat_completion_unified/UnifiedRequest.ts
+++ b/specification/inference/chat_completion_unified/UnifiedRequest.ts
@@ -22,7 +22,7 @@ import { Id } from '@_types/common'
 import { Duration } from '@_types/Time'
 import { RequestChatCompletion } from '@inference/_types/CommonTypes'
 /**
- * Perform chat completion inference
+ * Perform chat completion inference on the service
  *
  * The chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation.
  * It only works with the `chat_completion` task type for `openai` and `elastic` inference services.
diff --git a/specification/inference/stream_completion/StreamInferenceRequest.ts b/specification/inference/stream_completion/StreamInferenceRequest.ts
index 0e08af6a6f..c349a6dca0 100644
--- a/specification/inference/stream_completion/StreamInferenceRequest.ts
+++ b/specification/inference/stream_completion/StreamInferenceRequest.ts
@@ -23,7 +23,7 @@ import { Duration } from '@_types/Time'
 import { TaskSettings } from '@inference/_types/Services'
 
 /**
- * Perform streaming inference.
+ * Perform streaming completion inference on the service
  * Get real-time responses for completion tasks by delivering answers incrementally, reducing response times during computation.
  * This API works only with the completion task type.
  *

From d34f9d237e747869eaa84df4569f820eec16e4e7 Mon Sep 17 00:00:00 2001
From: David Kyle <david.kyle@elastic.co>
Date: Thu, 9 Oct 2025 10:20:20 +0100
Subject: [PATCH 4/5] rerank doc updates

---
 .../inference/completion/CompletionRequest.ts   |  6 ++++++
 specification/inference/rerank/RerankRequest.ts | 17 +++++++++++------
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/specification/inference/completion/CompletionRequest.ts b/specification/inference/completion/CompletionRequest.ts
index 2b05f213e1..6aa592c3b4 100644
--- a/specification/inference/completion/CompletionRequest.ts
+++ b/specification/inference/completion/CompletionRequest.ts
@@ -24,6 +24,12 @@ import { TaskSettings } from '@inference/_types/Services'
 
 /**
  * Perform completion inference on the service
+ * Get responses for completion tasks.
+ * This API works only with the completion task type.
+ *
+ * IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.
+ *
+ * This API requires the `monitor_inference` cluster privilege (the built-in `inference_admin` and `inference_user` roles grant this privilege).
  * @rest_spec_name inference.completion
  * @availability stack since=8.11.0 stability=stable visibility=public
  * @availability serverless stability=stable visibility=public
diff --git a/specification/inference/rerank/RerankRequest.ts b/specification/inference/rerank/RerankRequest.ts
index 04ffbed698..53ac180bad 100644
--- a/specification/inference/rerank/RerankRequest.ts
+++ b/specification/inference/rerank/RerankRequest.ts
@@ -19,6 +19,7 @@
 
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
+import { integer } from '@_types/Numeric'
 import { Duration } from '@_types/Time'
 import { TaskSettings } from '@inference/_types/Services'
 
@@ -56,13 +57,17 @@ export interface Request extends RequestBase {
      */
     query: string
     /**
-     * The text on which you want to perform the inference task.
-     * It can be a single string or an array.
-     *
-     * > info
-     * > Inference endpoints for the `completion` task type currently only support a single string as input.
+     * The documents to rank.
      */
-    input: string | Array<string>
+    input: Array<string>
+    /**
+     * Include the document text in the response.
+     */
+    return_documents?: boolean
+    /**
+     * Limit the response to the top N documents.
+     */
+    top_n?: integer
     /**
      * Task settings for the individual inference request.
      * These settings are specific to the task type you specified and override the task settings specified when initializing the service.

From 977228719c2e42a7701212f03162a911afb2c9d0 Mon Sep 17 00:00:00 2001
From: David Kyle <david.kyle@elastic.co>
Date: Thu, 9 Oct 2025 10:20:29 +0100
Subject: [PATCH 5/5] autogen

---
 output/openapi/elasticsearch-openapi.json     | 38 +++++-----
 .../elasticsearch-serverless-openapi.json     | 36 ++++-----
 output/schema/schema.json                     | 76 +++++++++++--------
 output/typescript/types.ts                    |  4 +-
 4 files changed, 86 insertions(+), 68 deletions(-)

diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json
index ff5cbaf31d..1686bf6e72 100644
--- a/output/openapi/elasticsearch-openapi.json
+++ b/output/openapi/elasticsearch-openapi.json
@@ -20430,7 +20430,7 @@
         "tags": [
           "inference"
         ],
-        "summary": "Perform chat completion inference\n",
+        "summary": "Perform chat completion inference on the service\n",
         "description": "The chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation.\nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai`, `hugging_face` or the `elastic` service, use the Chat completion inference API.",
         "operationId": "inference-chat-completion-unified",
         "parameters": [
@@ -20515,7 +20515,8 @@
         "tags": [
           "inference"
         ],
-        "summary": "Perform completion inference on the service",
+        "summary": "Perform completion inference on the service\n",
+        "description": "Get responses for completion tasks.\nThis API works only with the completion task type.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThis API requires the `monitor_inference` cluster privilege (the built-in `inference_admin` and `inference_user` roles grant this privilege).",
         "operationId": "inference-completion",
         "parameters": [
           {
@@ -20576,7 +20577,7 @@
               "examples": {
                 "CompletionRequestExample1": {
                   "summary": "Completion task",
-                  "description": "Run `POST _inference/completion/openai_chat_completions` to perform a completion on the example question.",
+                  "description": "Run `POST _inference/completion/openai_completions` to perform a completion on the example question.",
                   "value": "{\n  \"input\": \"What is Elastic?\"\n}"
                 }
               }
@@ -20594,7 +20595,7 @@
                 "examples": {
                   "CompletionResponseExample1": {
                     "summary": "Completion task",
-                    "description": "A successful response from `POST _inference/completion/openai_chat_completions`.\n",
+                    "description": "A successful response from `POST _inference/completion/openai_completions`.\n",
                     "value": "{\n  \"completion\": [\n    {\n      \"result\": \"Elastic is a company that provides a range of software solutions for search, logging, security, and analytics. Their flagship product is Elasticsearch, an open-source, distributed search engine that allows users to search, analyze, and visualize large volumes of data in real-time. Elastic also offers products such as Kibana, a data visualization tool, and Logstash, a log management and pipeline tool, as well as various other tools and solutions for data analysis and management.\"\n    }\n  ]\n}"
                   }
                 }
@@ -23627,18 +23628,19 @@
                     "type": "string"
                   },
                   "input": {
-                    "description": "The text on which you want to perform the inference task.\nIt can be a single string or an array.\n\n> info\n> Inference endpoints for the `completion` task type currently only support a single string as input.",
-                    "oneOf": [
-                      {
-                        "type": "string"
-                      },
-                      {
-                        "type": "array",
-                        "items": {
-                          "type": "string"
-                        }
-                      }
-                    ]
+                    "description": "The documents to rank.",
+                    "type": "array",
+                    "items": {
+                      "type": "string"
+                    }
+                  },
+                  "return_documents": {
+                    "description": "Include the document text in the response.",
+                    "type": "boolean"
+                  },
+                  "top_n": {
+                    "description": "Limit the response to the top N documents.",
+                    "type": "number"
                   },
                   "task_settings": {
                     "description": "Task settings for the individual inference request.\nThese settings are specific to the task type you specified and override the task settings specified when initializing the service.",
@@ -23818,7 +23820,7 @@
         "tags": [
           "inference"
         ],
-        "summary": "Perform streaming inference",
+        "summary": "Perform streaming completion inference on the service\n",
         "description": "Get real-time responses for completion tasks by delivering answers incrementally, reducing response times during computation.\nThis API works only with the completion task type.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThis API requires the `monitor_inference` cluster privilege (the built-in `inference_admin` and `inference_user` roles grant this privilege). You must use a client that supports streaming.\n\n## Required authorization\n\n* Cluster privileges: `monitor_inference`\n",
         "operationId": "inference-stream-completion",
         "parameters": [
@@ -147449,7 +147451,7 @@
             "examples": {
               "InferencePutExample1": {
                 "description": "An example body for a `PUT _inference/rerank/my-rerank-model` request.",
-                "value": "{\n \"service\": \"cohere\",\n \"service_settings\": {\n   \"model_id\": \"rerank-english-v3.0\",\n   \"api_key\": \"{{COHERE_API_KEY}}\"\n },\n \"chunking_settings\": {\n   \"strategy\": \"recursive\",\n   \"max_chunk_size\": 200,\n   \"separator_group\": \"markdown\"\n }\n}"
+                "value": "{\n \"service\": \"cohere\",\n \"service_settings\": {\n   \"model_id\": \"rerank-english-v3.0\",\n   \"api_key\": \"{{COHERE_API_KEY}}\"\n }\n}"
               }
             }
           }
diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json
index 62ef662a4f..cfb91ff62c 100644
--- a/output/openapi/elasticsearch-serverless-openapi.json
+++ b/output/openapi/elasticsearch-serverless-openapi.json
@@ -11414,7 +11414,7 @@
         "tags": [
           "inference"
         ],
-        "summary": "Perform chat completion inference\n",
+        "summary": "Perform chat completion inference on the service\n",
         "description": "The chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation.\nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai`, `hugging_face` or the `elastic` service, use the Chat completion inference API.",
         "operationId": "inference-chat-completion-unified",
         "parameters": [
@@ -11499,7 +11499,8 @@
         "tags": [
           "inference"
         ],
-        "summary": "Perform completion inference on the service",
+        "summary": "Perform completion inference on the service\n",
+        "description": "Get responses for completion tasks.\nThis API works only with the completion task type.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThis API requires the `monitor_inference` cluster privilege (the built-in `inference_admin` and `inference_user` roles grant this privilege).",
         "operationId": "inference-completion",
         "parameters": [
           {
@@ -11560,7 +11561,7 @@
               "examples": {
                 "CompletionRequestExample1": {
                   "summary": "Completion task",
-                  "description": "Run `POST _inference/completion/openai_chat_completions` to perform a completion on the example question.",
+                  "description": "Run `POST _inference/completion/openai_completions` to perform a completion on the example question.",
                   "value": "{\n  \"input\": \"What is Elastic?\"\n}"
                 }
               }
@@ -11578,7 +11579,7 @@
                 "examples": {
                   "CompletionResponseExample1": {
                     "summary": "Completion task",
-                    "description": "A successful response from `POST _inference/completion/openai_chat_completions`.\n",
+                    "description": "A successful response from `POST _inference/completion/openai_completions`.\n",
                     "value": "{\n  \"completion\": [\n    {\n      \"result\": \"Elastic is a company that provides a range of software solutions for search, logging, security, and analytics. Their flagship product is Elasticsearch, an open-source, distributed search engine that allows users to search, analyze, and visualize large volumes of data in real-time. Elastic also offers products such as Kibana, a data visualization tool, and Logstash, a log management and pipeline tool, as well as various other tools and solutions for data analysis and management.\"\n    }\n  ]\n}"
                   }
                 }
@@ -14611,18 +14612,19 @@
                     "type": "string"
                   },
                   "input": {
-                    "description": "The text on which you want to perform the inference task.\nIt can be a single string or an array.\n\n> info\n> Inference endpoints for the `completion` task type currently only support a single string as input.",
-                    "oneOf": [
-                      {
-                        "type": "string"
-                      },
-                      {
-                        "type": "array",
-                        "items": {
-                          "type": "string"
-                        }
-                      }
-                    ]
+                    "description": "The documents to rank.",
+                    "type": "array",
+                    "items": {
+                      "type": "string"
+                    }
+                  },
+                  "return_documents": {
+                    "description": "Include the document text in the response.",
+                    "type": "boolean"
+                  },
+                  "top_n": {
+                    "description": "Limit the response to the top N documents.",
+                    "type": "number"
                   },
                   "task_settings": {
                     "description": "Task settings for the individual inference request.\nThese settings are specific to the task type you specified and override the task settings specified when initializing the service.",
@@ -89619,7 +89621,7 @@
             "examples": {
               "InferencePutExample1": {
                 "description": "An example body for a `PUT _inference/rerank/my-rerank-model` request.",
-                "value": "{\n \"service\": \"cohere\",\n \"service_settings\": {\n   \"model_id\": \"rerank-english-v3.0\",\n   \"api_key\": \"{{COHERE_API_KEY}}\"\n },\n \"chunking_settings\": {\n   \"strategy\": \"recursive\",\n   \"max_chunk_size\": 200,\n   \"separator_group\": \"markdown\"\n }\n}"
+                "value": "{\n \"service\": \"cohere\",\n \"service_settings\": {\n   \"model_id\": \"rerank-english-v3.0\",\n   \"api_key\": \"{{COHERE_API_KEY}}\"\n }\n}"
               }
             }
           }
diff --git a/output/schema/schema.json b/output/schema/schema.json
index 42aa410e93..88666ef1b2 100644
--- a/output/schema/schema.json
+++ b/output/schema/schema.json
@@ -9708,7 +9708,7 @@
           "visibility": "public"
         }
       },
-      "description": "Perform chat completion inference\n\nThe chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation.\nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai`, `hugging_face` or the `elastic` service, use the Chat completion inference API.",
+      "description": "Perform chat completion inference on the service\n\nThe chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation.\nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai`, `hugging_face` or the `elastic` service, use the Chat completion inference API.",
       "docId": "inference-api-chat-completion",
       "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-unified-inference",
       "name": "inference.chat_completion_unified",
@@ -9748,7 +9748,7 @@
           "visibility": "public"
         }
       },
-      "description": "Perform completion inference on the service",
+      "description": "Perform completion inference on the service\nGet responses for completion tasks.\nThis API works only with the completion task type.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThis API requires the `monitor_inference` cluster privilege (the built-in `inference_admin` and `inference_user` roles grant this privilege).",
       "docId": "inference-api-post",
       "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-inference",
       "extPreviousVersionDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/8.18/post-inference-api.html",
@@ -11081,7 +11081,7 @@
           "visibility": "public"
         }
       },
-      "description": "Perform streaming inference.\nGet real-time responses for completion tasks by delivering answers incrementally, reducing response times during computation.\nThis API works only with the completion task type.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThis API requires the `monitor_inference` cluster privilege (the built-in `inference_admin` and `inference_user` roles grant this privilege). You must use a client that supports streaming.",
+      "description": "Perform streaming completion inference on the service\nGet real-time responses for completion tasks by delivering answers incrementally, reducing response times during computation.\nThis API works only with the completion task type.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThis API requires the `monitor_inference` cluster privilege (the built-in `inference_admin` and `inference_user` roles grant this privilege). You must use a client that supports streaming.",
       "docId": "inference-api-stream",
       "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-stream-inference",
       "extPreviousVersionDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/8.18/stream-inference-api.html",
@@ -175364,7 +175364,7 @@
           }
         }
       },
-      "description": "Perform chat completion inference\n\nThe chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation.\nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai`, `hugging_face` or the `elastic` service, use the Chat completion inference API.",
+      "description": "Perform chat completion inference on the service\n\nThe chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation.\nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai`, `hugging_face` or the `elastic` service, use the Chat completion inference API.",
       "examples": {
         "PostChatCompletionRequestExample1": {
           "alternatives": [
@@ -175566,7 +175566,7 @@
           }
         ]
       },
-      "description": "Perform completion inference on the service",
+      "description": "Perform completion inference on the service\nGet responses for completion tasks.\nThis API works only with the completion task type.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThis API requires the `monitor_inference` cluster privilege (the built-in `inference_admin` and `inference_user` roles grant this privilege).",
       "examples": {
         "CompletionRequestExample1": {
           "alternatives": [
@@ -175591,8 +175591,8 @@
               "language": "curl"
             }
           ],
-          "description": "Run `POST _inference/completion/openai_chat_completions` to perform a completion on the example question.",
-          "method_request": "POST _inference/completion/openai_chat_completions",
+          "description": "Run `POST _inference/completion/openai_completions` to perform a completion on the example question.",
+          "method_request": "POST _inference/completion/openai_completions",
           "summary": "Completion task",
           "value": "{\n  \"input\": \"What is Elastic?\"\n}"
         }
@@ -175636,7 +175636,7 @@
           }
         }
       ],
-      "specLocation": "inference/completion/CompletionRequest.ts#L25-L63"
+      "specLocation": "inference/completion/CompletionRequest.ts#L25-L69"
     },
     {
       "kind": "response",
@@ -175653,7 +175653,7 @@
       },
       "examples": {
         "CompletionResponseExample1": {
-          "description": "A successful response from `POST _inference/completion/openai_chat_completions`.\n",
+          "description": "A successful response from `POST _inference/completion/openai_completions`.\n",
           "summary": "Completion task",
           "value": "{\n  \"completion\": [\n    {\n      \"result\": \"Elastic is a company that provides a range of software solutions for search, logging, security, and analytics. Their flagship product is Elasticsearch, an open-source, distributed search engine that allows users to search, analyze, and visualize large volumes of data in real-time. Elastic also offers products such as Kibana, a data visualization tool, and Logstash, a log management and pipeline tool, as well as various other tools and solutions for data analysis and management.\"\n    }\n  ]\n}"
         }
@@ -176076,7 +176076,7 @@
           ],
           "description": "An example body for a `PUT _inference/rerank/my-rerank-model` request.",
           "method_request": "PUT _inference/rerank/my-rerank-model",
-          "value": "{\n \"service\": \"cohere\",\n \"service_settings\": {\n   \"model_id\": \"rerank-english-v3.0\",\n   \"api_key\": \"{{COHERE_API_KEY}}\"\n },\n \"chunking_settings\": {\n   \"strategy\": \"recursive\",\n   \"max_chunk_size\": 200,\n   \"separator_group\": \"markdown\"\n }\n}"
+          "value": "{\n \"service\": \"cohere\",\n \"service_settings\": {\n   \"model_id\": \"rerank-english-v3.0\",\n   \"api_key\": \"{{COHERE_API_KEY}}\"\n }\n}"
         }
       },
       "inherits": {
@@ -180456,30 +180456,42 @@
             }
           },
           {
-            "description": "The text on which you want to perform the inference task.\nIt can be a single string or an array.\n\n> info\n> Inference endpoints for the `completion` task type currently only support a single string as input.",
+            "description": "The documents to rank.",
             "name": "input",
             "required": true,
             "type": {
-              "kind": "union_of",
-              "items": [
-                {
-                  "kind": "instance_of",
-                  "type": {
-                    "name": "string",
-                    "namespace": "_builtins"
-                  }
-                },
-                {
-                  "kind": "array_of",
-                  "value": {
-                    "kind": "instance_of",
-                    "type": {
-                      "name": "string",
-                      "namespace": "_builtins"
-                    }
-                  }
+              "kind": "array_of",
+              "value": {
+                "kind": "instance_of",
+                "type": {
+                  "name": "string",
+                  "namespace": "_builtins"
                 }
-              ]
+              }
+            }
+          },
+          {
+            "description": "Include the document text in the response.",
+            "name": "return_documents",
+            "required": false,
+            "type": {
+              "kind": "instance_of",
+              "type": {
+                "name": "boolean",
+                "namespace": "_builtins"
+              }
+            }
+          },
+          {
+            "description": "Limit the response to the top N documents.",
+            "name": "top_n",
+            "required": false,
+            "type": {
+              "kind": "instance_of",
+              "type": {
+                "name": "integer",
+                "namespace": "_types"
+              }
             }
           },
           {
@@ -180622,7 +180634,7 @@
           }
         }
       ],
-      "specLocation": "inference/rerank/RerankRequest.ts#L25-L72"
+      "specLocation": "inference/rerank/RerankRequest.ts#L26-L77"
     },
     {
       "kind": "response",
@@ -180856,7 +180868,7 @@
           }
         ]
       },
-      "description": "Perform streaming inference.\nGet real-time responses for completion tasks by delivering answers incrementally, reducing response times during computation.\nThis API works only with the completion task type.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThis API requires the `monitor_inference` cluster privilege (the built-in `inference_admin` and `inference_user` roles grant this privilege). You must use a client that supports streaming.",
+      "description": "Perform streaming completion inference on the service\nGet real-time responses for completion tasks by delivering answers incrementally, reducing response times during computation.\nThis API works only with the completion task type.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThis API requires the `monitor_inference` cluster privilege (the built-in `inference_admin` and `inference_user` roles grant this privilege). You must use a client that supports streaming.",
       "examples": {
         "StreamInferenceRequestExample1": {
           "alternatives": [
diff --git a/output/typescript/types.ts b/output/typescript/types.ts
index 4d84479dd5..aab0077b16 100644
--- a/output/typescript/types.ts
+++ b/output/typescript/types.ts
@@ -14877,7 +14877,9 @@ export interface InferenceRerankRequest extends RequestBase {
   timeout?: Duration
   body?: {
     query: string
-    input: string | string[]
+    input: string[]
+    return_documents?: boolean
+    top_n?: integer
     task_settings?: InferenceTaskSettings
   }
 }