diff --git a/_field-types/supported-field-types/boolean.md b/_field-types/supported-field-types/boolean.md index b34ac47851..0fd3aa19ca 100644 --- a/_field-types/supported-field-types/boolean.md +++ b/_field-types/supported-field-types/boolean.md @@ -157,3 +157,48 @@ The script returns the value of `a` as `true`, `key` returns the value of `a` as } } ``` + +## Derived source + +Derived source may sort values when using multi-value field. For example: +```json +PUT sample-index1/_doc/1 +{ + "boolean": [false, "true", "false", true, ""] +} +``` +Will become: +```json +{ + "boolean": [false, false, false, true, true] +} +``` +If `null_value` is configured in field mapping parameter, ingested `null` value will be replaced with `null_value` in derived source +```json +PUT sample-index2 +{ + "settings": { + "index": { + "derived_source": { + "enabled": true + } + } + }, + "mappings": { + "properties": { + "boolean": {"type": "boolean", "null_value": true} + } + } +} + +PUT sample-index2/_doc/1 +{ + "keyword": [null, true, "false"] +} +``` +Will become: +```json +{ + "keyword": [false, true, true] +} +``` diff --git a/_field-types/supported-field-types/date-nanos.md b/_field-types/supported-field-types/date-nanos.md index eb569265fc..9ccb13611f 100644 --- a/_field-types/supported-field-types/date-nanos.md +++ b/_field-types/supported-field-types/date-nanos.md @@ -289,4 +289,39 @@ The response contains only the nanosecond parts of the fields: ] } } -``` \ No newline at end of file +``` + +## Derived source + +Derived source may sort values when using multi-value field, when configuring multiple date formats separated by `||` under `format` mapping parameter, derived source will return results in first provided format. For example: +```json +PUT sample-index1 +{ + "settings": { + "index": { + "derived_source": { + "enabled": true + } + } + }, + "mappings": { + "properties": { + "date_nanos": { + "type": "date_nanos", + "format": "strict_date_optional_time_nanos||strict_date_optional_time||epoch_millis" + } + } + } +} + +PUT sample-index1/_doc/1 +{ + "date_nanos": [1758504860, "2025-09-22T00:34", "2025-09-22T01:34:20Z"] +} +``` +Will become: +```json +{ + "date_nanos": ["2025-09-22T00:34:00.000000000Z", "2025-09-22T01:34:00.000000000Z", "2025-09-22T01:34:00.000000000Z"] +} +``` diff --git a/_field-types/supported-field-types/date.md b/_field-types/supported-field-types/date.md index 0d5a49c99f..e7ea2af712 100644 --- a/_field-types/supported-field-types/date.md +++ b/_field-types/supported-field-types/date.md @@ -359,3 +359,38 @@ The response contains both documents: } } ``` + +## Derived source + +Derived source may sort values when using multi-value field, when configuring multiple date formats separated by `||` under `format` mapping parameter, derived source will return results in first provided format. For example: +```json +PUT sample-index1 +{ + "settings": { + "index": { + "derived_source": { + "enabled": true + } + } + }, + "mappings": { + "properties": { + "date": { + "type": "date", + "format": "strict_date_time_no_millis||strict_date_optional_time||epoch_millis" + } + } + } +} + +PUT sample-index1/_doc/1 +{ + "date": [1758504860, "2025-09-22T00:34", "2025-09-22T01:34:20Z"] +} +``` +Will become: +```json +{ + "date": ["2025-09-22T00:34:00.000Z", "2025-09-22T01:34:00.000Z", "2025-09-22T01:34:00.000Z"] +} +``` diff --git a/_field-types/supported-field-types/geo-point.md b/_field-types/supported-field-types/geo-point.md index 96586d044f..8cba328a6b 100644 --- a/_field-types/supported-field-types/geo-point.md +++ b/_field-types/supported-field-types/geo-point.md @@ -112,4 +112,52 @@ Parameter | Description :--- | :--- `ignore_malformed` | A Boolean value that specifies to ignore malformed values and not to throw an exception. Valid values for latitude are [-90, 90]. Valid values for longitude are [-180, 180]. Default is `false`. `ignore_z_value` | Specific to points with three coordinates. If `ignore_z_value` is `true`, the third coordinate is not indexed but is still stored in the _source field. If `ignore_z_value` is `false`, an exception is thrown. -[`null_value`]({{site.url}}{{site.baseurl}}/opensearch/supported-field-types/index#null-value) | A value to be used in place of `null`. Must be of the same type as the field. If this parameter is not specified, the field is treated as missing when its value is `null`. Default is `null`. \ No newline at end of file +[`null_value`]({{site.url}}{{site.baseurl}}/opensearch/supported-field-types/index#null-value) | A value to be used in place of `null`. Must be of the same type as the field. If this parameter is not specified, the field is treated as missing when its value is `null`. Default is `null`. + +## Derived source + +Derived source will return the result in fix format of lat/lon pair, irrespective of the format it was ingested with. Derived source may sort `geo_point` by order of latitude/longitude pair and there can be a precision loss observed. +```json +PUT sample-index1/_doc/1 +{ + "geo_point": "txhxegj0uyp3" +} +``` +Or +```json +PUT sample-index1/_doc/1 +{ + "geo_point": "POINT (74.00 40.71)" +} +``` +Will become: +```json +{ + "geo_point": {"lat": 40.71, "lon": 74.00} +} +``` + +```json +PUT sample-index1/_doc/2 +{ + "geo_point": [ + {"lat":75.98, "lon":40.34}, + {"lat":-90, "lon":-80} + ] +} +``` +Will become: +```json +{ + "geo_point": [ + { + "lat": -90.0, + "lon": -80.00000000931323 + }, + { + "lat": 75.97999997902662, + "lon": 40.339999962598085 + } + ] +} +``` diff --git a/_field-types/supported-field-types/ip.md b/_field-types/supported-field-types/ip.md index 636e95429c..fa46155414 100644 --- a/_field-types/supported-field-types/ip.md +++ b/_field-types/supported-field-types/ip.md @@ -119,4 +119,18 @@ Parameter | Description [`null_value`]({{site.url}}{{site.baseurl}}/opensearch/supported-field-types/index#null-value) | A value to be used in place of `null`. Must be of the same type as the field. If this parameter is not specified, the field is treated as missing when its value is `null`. Default is `null`. `store` | A Boolean value that specifies whether the field value should be stored and can be retrieved separately from the _source field. Default is `false`. +## Derived source +Derived source may sort values and remove duplicates when using multi-value field. For example: +```json +PUT sample-index1/_doc/1 +{ + "ip": ["10.16.0.1", "192.168.0.1", "10.16.0.1", "2001:0db8:85a3:0000:0000:8a2e:0370:7334"] +} +``` +Will become: +```json +{ + "ip": ["10.16.0.1", "192.168.0.1", "2001:0db8:85a3:0000:0000:8a2e:0370:7334"] +} +``` diff --git a/_field-types/supported-field-types/keyword.md b/_field-types/supported-field-types/keyword.md index b4ae01a16a..92efcdadf1 100644 --- a/_field-types/supported-field-types/keyword.md +++ b/_field-types/supported-field-types/keyword.md @@ -174,3 +174,48 @@ When you run the same term query on the configured index, the query takes longer } } ``` + +## Derived source + +Derived source may sort and remove duplicates when using multi-value field. For example: +```json +PUT sample-index1/_doc/1 +{ + "keyword": ["ba", "ab", "ac", "ba"] +} +``` +Will become: +```json +{ + "keyword": ["ab", "ac", "ba"] +} +``` +If `null_value` is configured in field mapping parameter, ingested `null` value will be replaced with `null_value` in derived source +```json +PUT sample-index2 +{ + "settings": { + "index": { + "derived_source": { + "enabled": true + } + } + }, + "mappings": { + "properties": { + "keyword": {"type": "keyword", "null_value": "foo"} + } + } +} + +PUT sample-index2/_doc/1 +{ + "keyword": [null, "ba", "ab"] +} +``` +Will become: +```json +{ + "keyword": ["ab", "ba", "foo"] +} +``` diff --git a/_field-types/supported-field-types/numeric.md b/_field-types/supported-field-types/numeric.md index 5d902f9660..256badc27d 100644 --- a/_field-types/supported-field-types/numeric.md +++ b/_field-types/supported-field-types/numeric.md @@ -150,3 +150,64 @@ Scaled float has an additional required parameter: `scaling_factor`. Parameter | Description :--- | :--- `scaling_factor` | A double value that is multiplied by the field value and rounded to the nearest long. Required. + +## Derived source + +Derived source may sort values when using multi-value field. For example: +```json +PUT sample-index1/_doc/1 +{ + "integer": [1, 0, -1, 0] +} +``` +Will become: +```json +{ + "integer": [-1, 0, 0, 1] +} +``` + +When using `half_float`, precision loss may be observed based on stored precision. +```json +PUT sample-index2/_doc/1 +{ + "half_float": 1234.56 +} +``` +Will become +```json +{ + "half_float": 1235.0 +} +``` + +When using `scaled_float`, storing and retrieving the field value may result in precision loss due to `scaling_factor`. +```json +PUT sample-index3 +{ + "settings": { + "index": { + "derived_source": { + "enabled": true + } + } + }, + "mappings": { + "properties": { + "scaled_float": {"type": "scaled_float", "scaling_factor": 100} + } + } +} + +PUT sample-index3/_doc/1 +{ + "scaled_float": 12.345 +} +``` +Will become: +```json +{ + "scaled_float": 12.34 +} +``` + diff --git a/_field-types/supported-field-types/wildcard.md b/_field-types/supported-field-types/wildcard.md index 06dfebdefa..f958abd6e7 100644 --- a/_field-types/supported-field-types/wildcard.md +++ b/_field-types/supported-field-types/wildcard.md @@ -56,3 +56,20 @@ Parameter | Description `ignore_above` | Any string longer than this integer value should not be indexed. Default is `2147483647`. Dynamically updatable. `normalizer` | The normalizer used to preprocess values for indexing and search. By default, no normalization occurs and the original value is used. You may use the `lowercase` normalizer to perform case-insentive matching on the field. `null_value` | A value to be used in place of `null`. Must be of the same type as the field. If this parameter is not specified, then the field is treated as missing when its value is `null`. Default is `null`. + +## Derived source +`doc_values` must be enabled for `wildcard` field type to be supported for derived source. + +Derived source may sort and remove duplicates when using multi-value field. For example: +```json +PUT sample-index1/_doc/1 +{ + "wildcard": ["ba", "ab", "ac", "ba"] +} +``` +Will become: +```json +{ + "wildcard": ["ab", "ac", "ba"] +} +```