Del via


between

Check if the column value is between lower and upper bounds (inclusive).

Syntax

between(lowerBound, upperBound)

Parameters

Parameter Type Description
lowerBound value or Column Lower bound value
upperBound value or Column Upper bound value

Returns

Column (boolean)

Examples

Using between with integer values:

df = spark.createDataFrame([(2, "Alice"), (5, "Bob")], ["age", "name"])
df.select(df.name, df.age.between(2, 4)).show()
# +-----+---------------------------+
# | name|((age >= 2) AND (age <= 4))|
# +-----+---------------------------+
# |Alice|                       true|
# |  Bob|                      false|
# +-----+---------------------------+

Using between with string values:

df = spark.createDataFrame([("Alice", "A"), ("Bob", "B")], ["name", "initial"])
df.select(df.name, df.initial.between("A", "B")).show()
# +-----+-----------------------------------+
# | name|((initial >= A) AND (initial <= B))|
# +-----+-----------------------------------+
# |Alice|                               true|
# |  Bob|                               true|
# +-----+-----------------------------------+

Using between with float values:

df = spark.createDataFrame(
    [(2.5, "Alice"), (5.5, "Bob")], ["height", "name"])
df.select(df.name, df.height.between(2.0, 5.0)).show()
# +-----+-------------------------------------+
# | name|((height >= 2.0) AND (height <= 5.0))|
# +-----+-------------------------------------+
# |Alice|                                 true|
# |  Bob|                                false|
# +-----+-------------------------------------+

Using between with date values:

import pyspark.sql.functions as sf
df = spark.createDataFrame(
    [("Alice", "2023-01-01"), ("Bob", "2023-02-01")], ["name", "date"])
df = df.withColumn("date", sf.to_date(df.date))
df.select(df.name, df.date.between("2023-01-01", "2023-01-15")).show()
# +-----+-----------------------------------------------+
# | name|((date >= 2023-01-01) AND (date <= 2023-01-15))|
# +-----+-----------------------------------------------+
# |Alice|                                           true|
# |  Bob|                                          false|
# +-----+-----------------------------------------------+

Using between with timestamp values:

import pyspark.sql.functions as sf
df = spark.createDataFrame(
    [("Alice", "2023-01-01 10:00:00"), ("Bob", "2023-02-01 10:00:00")],
    schema=["name", "timestamp"])
df = df.withColumn("timestamp", sf.to_timestamp(df.timestamp))
df.select(df.name, df.timestamp.between("2023-01-01", "2023-02-01")).show()
# +-----+---------------------------------------------------------+
# | name|((timestamp >= 2023-01-01) AND (timestamp <= 2023-02-01))|
# +-----+---------------------------------------------------------+
# |Alice|                                                     true|
# |  Bob|                                                    false|
# +-----+---------------------------------------------------------+