I'm new to Hue and I'm using pySpark interpreter to do some analysis on a file. I want to show the content of an rdd as a table using %table (I see this in a demon showing Hue with pyspark). When I try to do the same thing I get the following error:
from __future__ import print_function
import time
import datetime
process_logs = sc.textFile('/user/cloudera/purchase_process/PurchaseProcess.csv')
def toTS(x):
return time.mktime(datetime.datetime.strptime(x, "%Y/%m/%d %H:%M:%S.%f").timetuple())
def build_cases_summary(case_id_events_mapping):
sorted_events=sorted(list(case_id_events_mapping[1]), key=lambda case_logs: toTS(case_logs[3]))
return (case_id_events_mapping[0],sorted_events[0][3],sorted_events[-1][4],len(sorted_events),(toTS(sorted_events[-1][4])-toTS(sorted_events[0][3]))/(60*60))
process_logs = process_logs.map(lambda line: line.split(";")).groupBy(lambda row: row[0])
cases_raw_summary = process_logs.map(build_cases_summary)
cases_summary = cases_raw_summary.collect()
nb_events = reduce(lambda x,y: x+y,map(lambda entry : entry[3],cases_summary))
%table cases_summary