%pylab inline --no-import-all
import alphalens
import pandas as pd
import numpy as np
import talib
from zipline.api import symbol
import pytz
from pandas import DataFrame
Populating the interactive namespace from numpy and matplotlib
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
%load_ext zipline
# Create global variables to feed alphalens
#dfPrice=pd.DataFrame()
dfPrice=pd.DataFrame()
seSig=pd.Series()
# Zipline algo
def initialize(context):
context.iNDays=100 # How many days of data we want
context.iADOFast=5 # talib AD Osc constant
context.iADOSlow=14 # talib AD Osc constant
context.secs=[]
context.secs.append(symbol("AAPL")) # Apple
context.secs.append(symbol("ACN"))
context.secs.append(symbol("ADI"))
context.secs.append(symbol("ADP"))
context.secs.append(symbol("ADS"))
context.secs.append(symbol("AEE"))
context.secs.append(symbol("AES"))
context.secs.append(symbol("AMD"))
context.secs.append(symbol("APH"))
context.secs.append(symbol("AWK"))
context.secs.append(symbol("CMS"))
context.secs.append(symbol("CNP"))
context.secs.append(symbol("DTE"))
context.secs.append(symbol("DUK"))
context.secs.append(symbol("GOGL"))
context.secs.append(symbol("LNT"))
def handle_data(context, data):
global dfPrice
global seSig
liSeries=[] # Used to collect the series as we go
# Get data
#dfO=data.history(context.secs,'open',context.iNDays,'1d')
dfP=data.history(context.secs,'price',context.iNDays,'1d')
dfL=data.history(context.secs,'low',context.iNDays,'1d')
dfH=data.history(context.secs,'high',context.iNDays,'1d')
dfV=data.history(context.secs,'volume',context.iNDays,'1d')
dfP.index.tz_convert(pytz.utc)
ixP=dfP.index # This is the date
for S in context.secs:
# Save our history for alphalens
dfPrice[S.symbol]=dfP[S]
#dfPrice[S.symbol]=data.get_spot_value(S.symbol, 'price')
# Normalize for tablib
#seO=dfO[S]/dfO[S].mean()
seP=dfP[S]/dfP[S].mean()
seL=dfL[S]/dfL[S].mean()
seH=dfH[S]/dfH[S].mean()
seV=dfV[S]/dfV[S].mean()
ndADosc=talib.ADOSC( \
seP.values,seL.values,seH.values,seV.values, \
context.iADOFast,context.iADOSlow)
# alphalens requires that the Series used for the Signal
# have a MultiIndex consisting of date+symbol
# Build a list of symbol names same length as our price data
liW=[S.symbol]*len(ixP)
# Make a tuple
tuW=zip(ixP,liW)
# Create the required MultiIndex
miW=pd.MultiIndex.from_tuples(tuW,names=['date','sym'])
# Create series
seW=pd.Series(ndADosc,index=miW)
#Create DataFrame
# seW=pd.DataFrame(ndADosc,index=miW)
# Save it for later
liSeries.append(seW)
# Now make the required series
seSig=pd.concat(liSeries).dropna()
#seSig.index.tz_convert(pytz.utc)
return
#zipline ingest -b custom-csvdir-bundle
%zipline --bundle=custom-csvdir-bundle --start 2018-9-12 --end 2018-9-12 --data-frequency=daily --capital-base=100000
| algo_volatility | algorithm_period_return | alpha | benchmark_period_return | benchmark_volatility | beta | capital_used | ending_cash | ending_exposure | ending_value | ... | short_exposure | short_value | shorts_count | sortino | starting_cash | starting_exposure | starting_value | trading_days | transactions | treasury_period_return | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2018-09-12 20:00:00+00:00 | None | 0.0 | None | 0.000242 | None | None | 0.0 | 100000.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0 | None | 100000.0 | 0.0 | 0.0 | 1 | [] | 0.0 |
1 rows × 37 columns
# Lets take a look at what got built
print type(dfPrice),"length=",len(dfPrice)
dfPrice.head()
<class 'pandas.core.frame.DataFrame'> length= 100
| AAPL | ACN | ADI | ADP | ADS | AEE | AES | AMD | APH | AWK | CMS | CNP | DTE | DUK | GOGL | LNT | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2018-04-23 00:00:00+00:00 | 165.24 | 153.04 | 88.00 | 117.10 | 207.79 | 56.63 | 11.86 | 10.04 | 84.40 | 84.28 | 45.37 | 25.94 | 103.33 | 77.99 | 8.39 | 41.69 |
| 2018-04-24 00:00:00+00:00 | 162.94 | 151.07 | 87.65 | 116.32 | 205.37 | 57.20 | 12.00 | 10.09 | 82.77 | 84.77 | 45.75 | 25.36 | 104.20 | 78.22 | 8.37 | 42.10 |
| 2018-04-25 00:00:00+00:00 | 163.65 | 150.09 | 87.77 | 116.70 | 204.10 | 57.21 | 12.03 | 9.71 | 82.88 | 84.75 | 45.86 | 24.92 | 104.01 | 78.89 | 8.22 | 42.23 |
| 2018-04-26 00:00:00+00:00 | 164.22 | 152.29 | 88.70 | 118.69 | 203.96 | 57.87 | 12.17 | 11.04 | 83.79 | 85.89 | 46.44 | 25.11 | 104.83 | 79.84 | 8.18 | 42.86 |
| 2018-04-27 00:00:00+00:00 | 162.32 | 152.39 | 87.68 | 119.88 | 204.48 | 58.60 | 12.31 | 11.11 | 84.28 | 86.78 | 47.18 | 25.47 | 105.65 | 80.50 | 8.16 | 43.06 |
print type(seSig),"length=",len(seSig)
print seSig.tail()
print seSig.index[0]
seSig.head()
<class 'pandas.core.series.Series'> length= 1392
date sym
2018-09-06 00:00:00+00:00 LNT -0.831877
2018-09-07 00:00:00+00:00 LNT -1.039598
2018-09-10 00:00:00+00:00 LNT -3.068961
2018-09-11 00:00:00+00:00 LNT -4.105084
2018-09-12 00:00:00+00:00 LNT -4.521285
dtype: float64
(Timestamp('2018-05-10 00:00:00+0000', tz='UTC'), u'AAPL')
date sym
2018-05-10 00:00:00+00:00 AAPL 4.751928
2018-05-11 00:00:00+00:00 AAPL 3.707006
2018-05-14 00:00:00+00:00 AAPL 2.938517
2018-05-15 00:00:00+00:00 AAPL 2.363901
2018-05-16 00:00:00+00:00 AAPL 1.608145
dtype: float64
sector_names = {
0 : "utilities",
1 : "information_technology"}
ticker_sector = {"AES" : 0, "LNT" : 0, "AEE" : 0, "AWK" : 0, "CNP" : 0, "CMS" : 0,
"DTE" : 0, "DUK" : 0, "ACN" : 1, "AMD" : 1, "ADS" : 1, "GOOGL" : 1,
"APH" : 1, "ADI" : 1, "AAPL" : 1, "ADP" : 1}
factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor=seSig,
prices=dfPrice,
quantiles=5,
bins=None,
groupby=ticker_sector,
groupby_labels=sector_names)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-10-d09aefec9175> in <module>()
4 bins=None,
5 groupby=ticker_sector,
----> 6 groupby_labels=sector_names)
/home/mosfiqur/.conda/envs/env_zipline/lib/python2.7/site-packages/alphalens/utils.pyc in call_w_context(*args, **kwargs)
609 "replaced by 'binning_by_group'",
610 category=DeprecationWarning, stacklevel=3)
--> 611 return func(*args, **kwargs)
612 return call_w_context
613
/home/mosfiqur/.conda/envs/env_zipline/lib/python2.7/site-packages/alphalens/utils.pyc in get_clean_factor_and_forward_returns(factor, prices, groupby, binning_by_group, quantiles, bins, periods, filter_zscore, groupby_labels, max_loss)
762
763 forward_returns = compute_forward_returns(factor, prices, periods,
--> 764 filter_zscore)
765
766 factor_data = get_clean_factor(factor, forward_returns, groupby=groupby,
/home/mosfiqur/.conda/envs/env_zipline/lib/python2.7/site-packages/alphalens/utils.pyc in compute_forward_returns(factor, prices, periods, filter_zscore)
281 start = prices.index[p_idx]
282 end = prices.index[p_idx + period]
--> 283 period_len = diff_custom_calendar_timedeltas(start, end, freq)
284 days_diffs.append(period_len.components.days)
285
/home/mosfiqur/.conda/envs/env_zipline/lib/python2.7/site-packages/alphalens/utils.pyc in diff_custom_calendar_timedeltas(start, end, freq)
920 if weekmask is not None and holidays is not None:
921 # we prefer this method as it is faster
--> 922 actual_days = np.busday_count(start, end, weekmask, holidays)
923 else:
924 # default, it is slow
TypeError: Iterator operand 0 dtype could not be cast from dtype('<M8[us]') to dtype('<M8[D]') according to the rule 'safe'