Create Date Dimension

from pyspark.sql import SparkSession

from pyspark.sql import functions as F

import pandas as pd


spark = SparkSession.builder.getOrCreate()


fin_year_start_month = 7  #financial year starts from July


#create a range of dates

dates = [[int(t.strftime('%Y%m%d')), t.strftime('%Y-%m-%d')] for t in pd.date_range(start_date, end_date, freq='D').to_list()]

df = spark.createDataFrame(dates, schema=['ID', 'Date'])


#calculate the other date dimension attributes

df = (

    df.withColumn('MonthStartDate', F.to_date(F.date_trunc('month', 'Date')))

      .withColumn('MonthEndDate', F.last_day('Date'))

      .withColumn('WeekStartDate', F.to_date(F.date_trunc('week', 'Date')))  #Starts from Sunday

      .withColumn('WeekEndDate', F.date_add('WeekStartDate', 6))

      .withColumn('CalendarYear', F.year('Date'))

      .withColumn('CalendarYearQuarter', F.concat(F.year('Date'), F.lit('-'), F.date_format('Date', 'QQQ')))

      .withColumn('CalendarQuarter', F.date_format('Date', 'QQQ'))

      .withColumn('CalendarQuarterNo', F.quarter('Date'))

      .withColumn('CalendarYearMonth', F.concat(F.year('Date'),F.lit('-'), F.month('Date')))

      .withColumn('CalendarYearMonthNo', F.month('Date'))

      .withColumn('MonthNameFull', F.date_format('Date', 'MMMM'))

      .withColumn('MonthNameShort', F.date_format('Date', 'MMM'))

      .withColumn('CalendarYearWeek', F.concat(F.year('Date'),F.lit('-'), F.weekofyear('Date')))

      .withColumn('CalendarYearWeekNo', F.weekofyear('Date'))

      .withColumn('CalendarMonthDayNo', F.dayofmonth('Date'))

      .withColumn('WeekDayNo', F.dayofweek('Date'))

      .withColumn('DayNameFull', F.date_format('Date', 'EEEE'))

      .withColumn('DayNameShort', F.date_format('Date', 'E'))

      .withColumn('FinancialYear', F.when(F.month('Date') >= fin_year_start_month, F.year('Date')+1).otherwise(F.year('Date')))

      .withColumn('FinancialQuarter', F.date_format(F.add_months('Date', -6), 'QQQ'))

      .withColumn('FinancialQuarterNo', F.quarter(F.add_months('Date', -6)))

      .withColumn('FinancialYearMonthNo', F.month(F.add_months('Date', -6)))

      .withColumn('FinancialYearMonth', F.concat('FinancialYear',F.lit('-'), 'FinancialYearMonthNo'))

      .withColumn('FinancialYearStartDate', F.concat(F.col('FinancialYear')-1,F.lit('-08-01')))

      .withColumn('FinancialYearWeekNo', F.weekofyear(F.add_months('Date', -6)))

      .withColumn('FinancialYearWeek', F.concat('FinancialYear', F.lit('-'), 'FinancialYearWeekNo'))

      .withColumn('FinancialYearQuarter',F.concat('FinancialYear', F.lit('-'), 'FinancialQuarterNo'))

      .withColumn('DaysPerMonth', F.date_format(F.last_day('Date'), 'd'))

      .withColumn('IsWeekDay', F.when((F.col('WeekDayNo') > 1) & (F.col('WeekDayNo') < 7), True).otherwise(False))

      .withColumn('YYYYMMDD', F.col('ID'))

)

df.show(50)