Del via


pivot (GroupedData)

Pivots a column of the current DataFrame and performs the specified aggregation.

Syntax

pivot(pivot_col, values=None)

Parameters

Parameter Type Description
pivot_col str Name of the column to pivot.
values list, optional List of values that will be translated to columns in the output DataFrame. If not provided, Spark eagerly computes the distinct values in pivot_col to determine the resulting schema. Providing an explicit list avoids this eager computation.

Returns

GroupedData

Examples

from pyspark.sql import Row, functions as sf

df1 = spark.createDataFrame([
    Row(course="dotNET", year=2012, earnings=10000),
    Row(course="Java", year=2012, earnings=20000),
    Row(course="dotNET", year=2012, earnings=5000),
    Row(course="dotNET", year=2013, earnings=48000),
    Row(course="Java", year=2013, earnings=30000),
])

# Compute the sum of earnings for each year by course with each course as a separate column.
df1.groupBy("year").pivot("course", ["dotNET", "Java"]).sum("earnings").sort("year").show()
# +----+------+-----+
# |year|dotNET| Java|
# +----+------+-----+
# |2012| 15000|20000|
# |2013| 48000|30000|
# +----+------+-----+

# Without specifying column values (less efficient).
df1.groupBy("year").pivot("course").sum("earnings").sort("year").show()
# +----+-----+------+
# |year| Java|dotNET|
# +----+-----+------+
# |2012|20000| 15000|
# |2013|30000| 48000|
# +----+-----+------+

# Using a nested column as the pivot column.
df2 = spark.createDataFrame([
    Row(training="expert", sales=Row(course="dotNET", year=2012, earnings=10000)),
    Row(training="junior", sales=Row(course="Java", year=2012, earnings=20000)),
    Row(training="expert", sales=Row(course="dotNET", year=2012, earnings=5000)),
    Row(training="junior", sales=Row(course="dotNET", year=2013, earnings=48000)),
    Row(training="expert", sales=Row(course="Java", year=2013, earnings=30000)),
])
df2.groupBy("sales.year").pivot("sales.course").agg(sf.sum("sales.earnings")).sort("year").show()
# +----+-----+------+
# |year| Java|dotNET|
# +----+-----+------+
# |2012|20000| 15000|
# |2013|30000| 48000|
# +----+-----+------+