Skip to content

Commit 08163a2

Browse files
AndreiKingsleyJolanrensen
authored andcommitted
pivot Kdocs fixes
(cherry picked from commit bb0be64) # Conflicts: # core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/groupBy.kt # core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt
1 parent 7418b97 commit 08163a2

File tree

2 files changed

+1004
-50
lines changed

2 files changed

+1004
-50
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/groupBy.kt

Lines changed: 65 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -41,20 +41,16 @@ import kotlin.reflect.KProperty
4141
* defines the group consisting of all rows where the column(s) contain that value combination.
4242
*
4343
* Returns a [GroupBy] — a dataframe-like structure that contains all unique combinations of key values
44-
* along with the corresponding groups of rows (each represented as a [DataFrame]).
44+
* along with the corresponding groups of rows (each represented as a [DataFrame]) as rows.
4545
*
4646
* A [GroupBy] can then be:
4747
* * [transformed][Transformation] into a new [GroupBy];
4848
* * [reduced][Reducing] into a [DataFrame], where each group is collapsed into a single representative row;
4949
* * [aggregated][Aggregation] into a [DataFrame], where each group is transformed into one or more rows of derived values;
50-
* * [pivoted][Pivoting] into a [PivotGroupBy] structure, which combines [pivot] and [groupBy] operations.
50+
* * [pivoted][Pivoting] into a [PivotGroupBy] structure, which combines [pivot] and [groupBy] operations
51+
* and then reduced or aggregated into a [DataFrame].
5152
*
52-
* Grouping keys can also be created inline:
53-
* ```kotlin
54-
* // Create a new column "newName" based on existing "oldName" values
55-
* // and use it as a grouping key:
56-
* df.groupBy { expr("newName") { oldName.drop(5) } }
57-
* ```
53+
* @include [GroupingKeysInline]
5854
*
5955
* Check out [Grammar].
6056
*
@@ -66,6 +62,8 @@ import kotlin.reflect.KProperty
6662
*
6763
* Don't confuse this with [group], which groups column into
6864
* [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup].
65+
*
66+
* See also [pivot][DataFrame.pivot] that groups rows of [DataFrame] vertically.
6967
*/
7068
internal interface GroupByDocs {
7169
/**
@@ -160,16 +158,16 @@ internal interface GroupByDocs {
160158
* ### Pivot [GroupBy] into [PivotGroupBy] and reduce / aggregate it
161159
*
162160
* {@include [Indent]}
163-
* `| `__`.`__[**`pivot`**][GroupBy.pivot]**` { `**`columns: `[`ColumnsSelector`][ColumnsSelector]**` }`**
161+
* [GroupBy][GroupBy]`.`[**`pivot`**][GroupBy.pivot]**` { `**`columns: `[`ColumnsSelector`][ColumnsSelector]**` }`**
164162
*
165163
* {@include [Indent]}
166164
* ` \[ `__`.`__[**`default`**][PivotGroupBy.default]**`(`**`defaultValue`**`) `**`]`
167165
*
168166
* {@include [Indent]}
169-
* `| `__`.`__[<pivot_reducer>][PivotGroupByDocs.Reducing]
167+
* __`.`__[<pivot_groupBy_reducer>][PivotGroupByDocs.Reducing]
170168
*
171169
* {@include [Indent]}
172-
* `| `__`.`__[<pivot_aggregator>][PivotGroupByDocs.Aggregation]
170+
* `| `__`.`__[<pivot_groupBy_groupBy>][PivotGroupByDocs.Aggregation]
173171
*
174172
* Check out [PivotGroupBy Grammar][PivotGroupByDocs.Grammar] for more information.
175173
*/
@@ -183,14 +181,37 @@ internal interface GroupByDocs {
183181

184182
/**
185183
* ### [GroupBy] aggregation statistics
186-
* * [count][Grouped.count]
187-
* * [max][Grouped.max]/[maxOf][Grouped.maxOf]/[maxFor][Grouped.maxFor]
188-
* * [min][Grouped.min]/[minOf][Grouped.minOf]/[minFor][Grouped.minFor]
189-
* * [sum][Grouped.sum]/[sumOf][Grouped.sumOf]/[sumFor][Grouped.sumFor]
190-
* * [mean][Grouped.mean]/[meanOf][Grouped.meanOf]/[meanFor][Grouped.meanFor]
191-
* * [std][Grouped.std]/[stdOf][Grouped.stdOf]/[stdFor][Grouped.stdFor]
192-
* * [median][Grouped.median]/[medianOf][Grouped.medianOf]/[medianFor][Grouped.medianFor]
193-
* * [percentile][Grouped.percentile]/[percentileOf][Grouped.percentileOf]/[percentileFor][Grouped.percentileFor]
184+
*
185+
* Provides predefined shortcuts for the most common statistical aggregation operations
186+
* that can be applied to each group within a [GroupBy].
187+
*
188+
* Each function computes a statistic across the rows of a group and returns the result as
189+
* a new column (or several columns) in the resulting [DataFrame].
190+
*
191+
* * [count][Grouped.count] — calculate the number of rows in each group;
192+
* * [max][Grouped.max] / [maxOf][Grouped.maxOf] / [maxFor][Grouped.maxFor] —
193+
* calculate the maximum of all values on the selected columns / by a row expression /
194+
* for each of the selected columns within each group;
195+
* * [min][Grouped.min] / [minOf][Grouped.minOf] / [minFor][Grouped.minFor] —
196+
* calculate the minimum of all values on the selected columns / by a row expression /
197+
* for each of the selected columns within each group;
198+
* * [sum][Grouped.sum] / [sumOf][Grouped.sumOf] / [sumFor][Grouped.sumFor] —
199+
* calculate the sum of all values on the selected columns / by a row expression /
200+
* for each of the selected columns within each group;
201+
* * [mean][Grouped.mean] / [meanOf][Grouped.meanOf] / [meanFor][Grouped.meanFor] —
202+
* calculate the mean (average) of all values on the selected columns / by a row expression /
203+
* for each of the selected columns within each group;
204+
* * [std][Grouped.std] / [stdOf][Grouped.stdOf] / [stdFor][Grouped.stdFor] —
205+
* calculate the standard deviation of all values on the selected columns / by a row expression /
206+
* for each of the selected columns within each group;
207+
* * [median][Grouped.median] / [medianOf][Grouped.medianOf] / [medianFor][Grouped.medianFor] —
208+
* calculate the median of all values on the selected columns / by a row expression /
209+
* for each of the selected columns within each group;
210+
* * [percentile][Grouped.percentile] / [percentileOf][Grouped.percentileOf] / [percentileFor][Grouped.percentileFor] —
211+
* calculate a specified percentile of all values on the selected columns / by a row expression /
212+
* for each of the selected columns within each group.
213+
*
214+
* For more information: {@include [DocumentationUrls.GroupByStatistics]}
194215
*/
195216
interface AggregationStatistics
196217

@@ -235,8 +256,8 @@ internal interface GroupByDocs {
235256
* These functions return a [ReducedGroupBy], which can then be transformed into a new [DataFrame]
236257
* containing the reduced rows (either original or transformed) using one of the following methods:
237258
* * [concat][ReducedGroupBy.concat] — simply concatenates all reduced rows;
238-
* * [values][ReducedGroupBy.values] — creates a [DataFrame] with new rows by transforming each reduced row
239-
* using [ColumnsForAggregateSelectionDsl];
259+
* * [values][ReducedGroupBy.values] — creates a [DataFrame] containing the values
260+
* from the reduced rows in the selected columns.
240261
* * [into][ReducedGroupBy.into] — creates a new column with values computed with [RowExpression] on each row,
241262
* or a new [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup]
242263
* containing each group reduced to a single row;
@@ -262,14 +283,16 @@ internal interface GroupByDocs {
262283
* The following aggregation methods are available:
263284
* * [concat][GroupBy.concat] — concatenates all rows from all groups into a single [DataFrame],
264285
* without preserving grouping keys;
286+
* * [toDataFrame][GroupBy.toDataFrame] — returns this [GroupBy] as [DataFrame] with the grouping keys and
287+
* corresponding groups in [FrameColumn].
265288
* * [concatWithKeys][GroupBy.concatWithKeys] — a variant of [concat][GroupBy.concat] that also includes
266289
* grouping keys that were not present in the original [DataFrame];
267290
* * [into][GroupBy.into] — creates a new column containing a list of values computed with a [RowExpression]
268291
* for each group, or a new [frame column][org.jetbrains.kotlinx.dataframe.columns.FrameColumn]
269292
* containing the groups themselves;
270-
* * [values][ReducedGroupBy.values] — creates a [DataFrame] with new rows produced by transforming
271-
* each group using [ColumnsForAggregateSelectionDsl];
272-
* * [count][Grouped.count] — returns a [DataFrame] containing the grouping key columns and an additional column
293+
* * [values][Grouped.values] — creates a [DataFrame] containing values collected into a single [List]
294+
* from all rows of each group for the selected columns.
295+
* * [count][Grouped.count] — creates a [DataFrame] containing the grouping key columns and an additional column
273296
* with the number of rows in each corresponding group;
274297
* * [aggregate][Grouped.aggregate] — performs a set of custom aggregations using [AggregateDsl],
275298
* allowing you to compute one or more derived values per group;
@@ -295,6 +318,19 @@ internal interface GroupByDocs {
295318
* @include [PivotGroupByDocs.CommonDescription]
296319
*/
297320
interface Pivoting
321+
322+
/**
323+
* Grouping keys can also be created inline
324+
* (i.g. by creating a new column using [expr] or simply renaming the old one
325+
* using [named]):
326+
* ```kotlin
327+
* // Create a new column "newName" based on existing "oldName" values
328+
* // and use it as a grouping key:
329+
* df.groupBy { expr("newName") { oldName.drop(5) } }
330+
* ```
331+
*/
332+
@ExcludeFromSources
333+
interface GroupingKeysInline
298334
}
299335

300336
/** {@set [SelectingColumns.OPERATION] [groupBy][groupBy]} */
@@ -348,19 +384,13 @@ public fun <T> DataFrame<T>.groupBy(vararg cols: AnyColumnReference, moveToTop:
348384
// endregion
349385

350386
/**
351-
* Groups the rows of this [Pivot] into [PivotGroupBy]
387+
* Groups the rows of this [Pivot] groups
352388
* based on the values in one or more specified [key columns][\columns].
353-
*
354-
* Works like regular [DataFrame.groupBy] on pivot groups.
355-
*
356-
* Grouping keys can also be created inline:
357-
* ```kotlin
358-
* // Create a new column "newName" based on existing "oldName" values
359-
* // and use it as a grouping key:
360-
* pivot.groupBy { expr("newName") { oldName.drop(5) } }
361-
* ```
389+
* Returns a [PivotGroupBy].
362390
*
363391
* @include [PivotGroupByDocs.CommonDescription]
392+
*
393+
* @include [GroupByDocs.GroupingKeysInline]
364394
*/
365395
@ExcludeFromSources
366396
private interface GroupByForPivotDocs
@@ -376,17 +406,8 @@ private interface CommonGroupByForPivotDocs
376406

377407
/**
378408
* {@include [CommonGroupByForPivotDocs]}
379-
* @include [SelectingColumns.Dsl]
380-
*
381-
* #### For example:
409+
* @include [SelectingColumns.Dsl.WithExample] {@include [SetGroupByOperationArg] {@set [SelectingColumns.RECEIVER] <code>`pivot`</code>}}
382410
*
383-
* `pivot.`{@get [OPERATION]}` { length `[and][ColumnsSelectionDsl.and]` age }`
384-
*
385-
* `pivot.`{@get [OPERATION]}` { `[cols][ColumnsSelectionDsl.cols]`(1..5) }`
386-
*
387-
* `pivot.`{@get [OPERATION]}` { `[colsOf][ColumnsSelectionDsl.colsOf]`<`[Double][Double]`>() }`
388-
*
389-
* {@include [SetGroupByOperationArg]}
390411
* @param moveToTop Specifies whether nested grouping columns should be moved to the top level
391412
* or kept inside a [ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup].
392413
* Defaults to `true`.

0 commit comments

Comments
 (0)