cast from dtype('<M8[us]') to dtype('<M8[D]')

1,852 views
Skip to first unread message

Mosfiqur Rahman

unread,
Sep 19, 2018, 1:24:34 PM9/19/18
to Zipline Python Opensource Backtester
Hi,
I was looking at the code about using the zipline to create the pandas DataFrame and Series objects to pass into alphalense. But in the end, while I was using a handy data formatting Alphalens function to transform my factor and pricing data into the exact inputs expected by the tear sheet functions, I'm getting the following error.factor and pricing data into the exact inputs expected by the tear sheet functions I'm getting the following error

TypeError: Iterator operand 0 dtype could not be cast from dtype('<M8[us]') to dtype('<M8[D]') according to the rule 'safe'

Any Idea how I should resolve this issue?

Here's my code:

%pylab inline --no-import-all
import alphalens
import pandas as pd
import numpy as np
import talib
from zipline.api import symbol
import pytz
from pandas import DataFrame
Populating the interactive namespace from numpy and matplotlib
In [2]:
import warnings
warnings.filterwarnings('ignore')
In [3]:
%matplotlib inline
%load_ext zipline
In [4]:
# Create global variables to feed alphalens
#dfPrice=pd.DataFrame()
dfPrice=pd.DataFrame()
seSig=pd.Series()
In [5]:
# Zipline algo

def initialize(context):
    context.iNDays=100   # How many days of data we want
   
    context.iADOFast=5   # talib AD Osc constant
    context.iADOSlow=14  # talib AD Osc constant
   
    context.secs=[]
    context.secs.append(symbol("AAPL")) # Apple
    context.secs.append(symbol("ACN"))
    context.secs.append(symbol("ADI"))
    context.secs.append(symbol("ADP"))
    context.secs.append(symbol("ADS"))
    context.secs.append(symbol("AEE"))
    context.secs.append(symbol("AES"))
    context.secs.append(symbol("AMD"))
    context.secs.append(symbol("APH"))
    context.secs.append(symbol("AWK"))
    context.secs.append(symbol("CMS"))
    context.secs.append(symbol("CNP"))
    context.secs.append(symbol("DTE"))
    context.secs.append(symbol("DUK"))
    context.secs.append(symbol("GOGL"))
    context.secs.append(symbol("LNT"))

def handle_data(context, data):
    global dfPrice
    global seSig  
    liSeries=[]  # Used to collect the series as we go

    # Get data
    #dfO=data.history(context.secs,'open',context.iNDays,'1d')

    dfP=data.history(context.secs,'price',context.iNDays,'1d')
    dfL=data.history(context.secs,'low',context.iNDays,'1d')
    dfH=data.history(context.secs,'high',context.iNDays,'1d')
    dfV=data.history(context.secs,'volume',context.iNDays,'1d')
   
    dfP.index.tz_convert(pytz.utc)
    ixP=dfP.index  # This is the date
   

    for S in context.secs:
       
        # Save our history for alphalens
        dfPrice[S.symbol]=dfP[S]
       
        #dfPrice[S.symbol]=data.get_spot_value(S.symbol, 'price')
       
        # Normalize for tablib
       
        #seO=dfO[S]/dfO[S].mean()
        seP=dfP[S]/dfP[S].mean()
        seL=dfL[S]/dfL[S].mean()
        seH=dfH[S]/dfH[S].mean()
        seV=dfV[S]/dfV[S].mean()

        ndADosc=talib.ADOSC( \
            seP.values,seL.values,seH.values,seV.values, \
            context.iADOFast,context.iADOSlow)
   
        # alphalens requires that the Series used for the Signal
        # have a MultiIndex consisting of date+symbol

        # Build a list of symbol names same length as our price data
        liW=[S.symbol]*len(ixP)
        # Make a tuple
        tuW=zip(ixP,liW)
        # Create the required MultiIndex
        miW=pd.MultiIndex.from_tuples(tuW,names=['date','sym'])
#         Create series
        seW=pd.Series(ndADosc,index=miW)
        #Create DataFrame
#        seW=pd.DataFrame(ndADosc,index=miW)

        # Save it for later
        liSeries.append(seW)

    # Now make the required series
    seSig=pd.concat(liSeries).dropna()
    #seSig.index.tz_convert(pytz.utc)

    return
In [6]:
#zipline ingest -b custom-csvdir-bundle
%zipline --bundle=custom-csvdir-bundle --start 2018-9-12 --end 2018-9-12 --data-frequency=daily --capital-base=100000
Out[6]:
algo_volatilityalgorithm_period_returnalphabenchmark_period_returnbenchmark_volatilitybetacapital_usedending_cashending_exposureending_value...short_exposureshort_valueshorts_countsortinostarting_cashstarting_exposurestarting_valuetrading_daystransactionstreasury_period_return
2018-09-12 20:00:00+00:00None0.0None0.000242NoneNone0.0100000.00.00.0...0.00.00None100000.00.00.01[]0.0

1 rows × 37 columns

In [7]:
# Lets take a look at what got built
print type(dfPrice),"length=",len(dfPrice)
dfPrice.head()
<class 'pandas.core.frame.DataFrame'> length= 100
Out[7]:
AAPLACNADIADPADSAEEAESAMDAPHAWKCMSCNPDTEDUKGOGLLNT
2018-04-23 00:00:00+00:00165.24153.0488.00117.10207.7956.6311.8610.0484.4084.2845.3725.94103.3377.998.3941.69
2018-04-24 00:00:00+00:00162.94151.0787.65116.32205.3757.2012.0010.0982.7784.7745.7525.36104.2078.228.3742.10
2018-04-25 00:00:00+00:00163.65150.0987.77116.70204.1057.2112.039.7182.8884.7545.8624.92104.0178.898.2242.23
2018-04-26 00:00:00+00:00164.22152.2988.70118.69203.9657.8712.1711.0483.7985.8946.4425.11104.8379.848.1842.86
2018-04-27 00:00:00+00:00162.32152.3987.68119.88204.4858.6012.3111.1184.2886.7847.1825.47105.6580.508.1643.06
In [8]:
print type(seSig),"length=",len(seSig)
print seSig.tail()
print seSig.index[0]
seSig.head()
<class 'pandas.core.series.Series'> length= 1392
date                       sym
2018-09-06 00:00:00+00:00  LNT   -0.831877
2018-09-07 00:00:00+00:00  LNT   -1.039598
2018-09-10 00:00:00+00:00  LNT   -3.068961
2018-09-11 00:00:00+00:00  LNT   -4.105084
2018-09-12 00:00:00+00:00  LNT   -4.521285
dtype: float64
(Timestamp('2018-05-10 00:00:00+0000', tz='UTC'), u'AAPL')
Out[8]:
date                       sym 
2018-05-10 00:00:00+00:00  AAPL    4.751928
2018-05-11 00:00:00+00:00  AAPL    3.707006
2018-05-14 00:00:00+00:00  AAPL    2.938517
2018-05-15 00:00:00+00:00  AAPL    2.363901
2018-05-16 00:00:00+00:00  AAPL    1.608145
dtype: float64
In [9]:
sector_names = {
    0 : "utilities",
    1 : "information_technology"}
ticker_sector = {"AES" : 0, "LNT" : 0, "AEE" : 0, "AWK" : 0, "CNP" : 0, "CMS" : 0,
                 "DTE" : 0, "DUK" : 0, "ACN" : 1, "AMD" : 1, "ADS" : 1, "GOOGL" : 1,
                 "APH" : 1, "ADI" : 1, "AAPL" : 1, "ADP" : 1}
In [10]:
factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor=seSig, 
                                                                   prices=dfPrice,
                                                                   quantiles=5,
                                                                   bins=None,
                                                                   groupby=ticker_sector,
                                                                   groupby_labels=sector_names)
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-10-d09aefec9175> in <module>()
      4                                                                    bins=None,
      5                                                                    groupby=ticker_sector,
----> 6                                                                    groupby_labels=sector_names)

/home/mosfiqur/.conda/envs/env_zipline/lib/python2.7/site-packages/alphalens/utils.pyc in call_w_context(*args, **kwargs)
    609                           "replaced by 'binning_by_group'",
    610                           category=DeprecationWarning, stacklevel=3)
--> 611         return func(*args, **kwargs)
    612     return call_w_context
    613

/home/mosfiqur/.conda/envs/env_zipline/lib/python2.7/site-packages/alphalens/utils.pyc in get_clean_factor_and_forward_returns(factor, prices, groupby, binning_by_group, quantiles, bins, periods, filter_zscore, groupby_labels, max_loss)
    762
    763     forward_returns = compute_forward_returns(factor, prices, periods,
--> 764                                               filter_zscore)
    765
    766     factor_data = get_clean_factor(factor, forward_returns, groupby=groupby,

/home/mosfiqur/.conda/envs/env_zipline/lib/python2.7/site-packages/alphalens/utils.pyc in compute_forward_returns(factor, prices, periods, filter_zscore)
    281             start = prices.index[p_idx]
    282             end = prices.index[p_idx + period]
--> 283             period_len = diff_custom_calendar_timedeltas(start, end, freq)
    284             days_diffs.append(period_len.components.days)
    285

/home/mosfiqur/.conda/envs/env_zipline/lib/python2.7/site-packages/alphalens/utils.pyc in diff_custom_calendar_timedeltas(start, end, freq)
    920     if weekmask is not None and holidays is not None:
    921         # we prefer this method as it is faster
--> 922         actual_days = np.busday_count(start, end, weekmask, holidays)
    923     else:
    924         # default, it is slow

TypeError: Iterator operand 0 dtype could not be cast from dtype('<M8[us]') to dtype('<M8[D]') according to the rule 'safe'


Message has been deleted

Mosfiqur Rahman

unread,
Sep 24, 2018, 9:56:17 AM9/24/18
to Zipline Python Opensource Backtester

Can any of you still help me figure this out? Tried a few ways, still no luck.  

Peter Sikuda

unread,
May 7, 2019, 8:31:40 AM5/7/19
to Zipline Python Opensource Backtester
I have the same issue, trying to process own data (price, factors) with proper format. Also tried to change date format with .astype('datetime64[D]').
Seems anything doesn't help. Seems to me as pandas/numpy issue.

Any possible solutions or some other ways to use alphalens than from ingested quandl data?

I am working with cryptocurrency data.

Thanks.

Dňa pondelok, 24. septembra 2018 15:56:17 UTC+2 Mosfiqur Rahman napísal(-a):
Reply all
Reply to author
Forward
0 new messages