[Pyspark] udf에서 2개 이상의 multiple column 리턴하기
2021.11.04
import pyspark.sql.functions as F from pyspark.sql.functions import udf def fill_empty_data(ds, y, avg_prc): data = {'ds' : ds, 'y' : y, 'avg_prc' : avg_prc} df = pd.DataFrame(data) date_range = pd.date_range(start=START_DATE, end=END_DATE) df['ds'] = pd.to_datetime(df['ds']) df = df.set_index('ds') df = pd.merge(date_range.to_frame(), df, left_index=True, right_index=True, how='left') df['ds'] ..