Compustat CRSP IBES Merge

1,227 views
Skip to first unread message

Bianca Minuth

unread,
Mar 4, 2021, 9:22:10 AM3/4/21
to wrdssas
Hi everyone,

for my project I need to combine the data from all Compustat CRSP and IBES datasets. I tried to follow the Codes on the WRDS Website and created a merged dataset using linking tables. I need monthly or quarterly data. 

I linked compustat - crsp and crsp - ibes, but I don't know how to combine all three datasets. 

I would be greateful for your help. Thanks a lot.

So far I used theis code in SAS: 
(*sorry for the long code)
 
/************************************************************************************
* STEP ONE: Create Linking Table with 8-digit CUSIP;
************************************************************************************/

* Create 8-digit CUSIP using "NAMES" file;
data compcusip (keep = gvkey cusip cusip8 tic);
set comp.names;
cusip8 = substr (cusip,1,8);
run;
 
proc print data=compcusip noobs label;
var gvkey tic cusip cusip8;
run;
 
*Extract CRSP Cusip from "STOCKNAMES" file;
proc sort data=crsp.stocknames (keep=cusip permco permno)out=crspcusip nodupkey;
by cusip;run;
 
 
* Merge Compusat cusip with CRSP cusip and create table "total";
proc sql;
create table total as select
compcusip.*,  crspcusip.*
from compcusip, crspcusip
where compcusip.cusip8 = crspcusip.cusip;
quit;
run; 
 
proc print noobs data = total;
var gvkey tic cusip8 permno;
run;

/************************************************************************************
* STEP TWO: Extract Compusat  data;
************************************************************************************/
 * Selected GVKEYS-- use quotes to be consistent with character variables;
 
 
* Date range-- applied to FYEAR (Fiscal Year);
%let fyear1= 2012; 
%let fyear2= 2019;
 
 
* Make extract from Compustat Quarterly Funda file;
data compx2;
   set comp.fundq ;
   where datadate >= '31dec2011'd;
   if indfmt='INDL' and datafmt='STD' and popsrc='D' and consol='C';
   * create begin and end dates for fiscal year;
   format endfyr begfyr date9.;
   endfyr= datadate;
   begfyr= intnx('month',endfyr,-11,'beg');
   *intnx(interval, from, n, 'aligment');
   sxa= sale/at;  * compute sales over assets ratio;
run;
 
proc sort; by gvkey endfyr; run;
 
/****************************************************************************************
* STEP THREE: Link GVKEYS to CRSP Identifiers;
* Use CCMXPF_LNKHIST table to obtain CRSP identifiers for our subset of companies/dates;
*****************************************************************************************/
 
*Merge Compusat set with Linking table;
proc sql;
create table CC_link
as select *
from compx2 as a, total as b
where a.gvkey = b.gvkey;
quit;
 
/************************************************************************************
* STEP FOUR: Option 2: Alternative way of matching CRSP data;
* Match accounting data with fiscal yearends in month 't',
  with CRSP return data from month 't+3' to month 't+14' (12 months);
*************************************************************************************/
proc sql;
        create table comp_CRSP as select*
        from CC_link as a, crsp.msf as b
        where a.permno = b.permno and
        intck('month',a.endfyr,b.date)between 3 and 14;
quit;
 
proc download data=comp_CRSP out=mylocal.ccmfundaex; *download output dataset to local location;
run;

/************************************************************************************
* STEP FIVE: CUSIP Method to Link IBES TICKERS and CRSP PERMNOs;
*************************************************************************************/

* Complete list of the IBES TICKERs for all U.S. companies tracked by IBES
** Generate a complete list of the IBES TICKERs for US companies along with all associated historical CUSIPs
** Only observations with non-missing CUSIP are retained;

proc sort data=IBES.IDSUM out=IBES1
(keep=ticker cusip cname sdates);
where USFIRM=1 and not(missing(cusip));
by ticker cusip sdates;
run;

** rows with duplicate CUSIP information for each IBES TICKER should be deleted
** SDATES variable for the company identifying information tracks start dates
** We keep one record per IBES TICKER – CUSIP combination;

proc sql;
create table IBES2
as select *, min(sdates) as fdate, max(sdates) as ldate
from IBES1
group by ticker, cusip
order by ticker, cusip, sdates;
quit;

data IBES2;
set IBES2;
by ticker cusip;
if last.cusip;
label fdate="First Start date of CUSIP record";
label ldate="Last Start date of CUSIP record";
format fdate ldate date9.;
drop sdates;
run;

** Similarly, we will prepare the CRSP PERMNO – CUSIP set using the STOCKNAMES dataset
** The data needs to be arranged by deleting rows with duplicate CUSIP information for each PERMNO, 
** and constructing an effective date range for each historical CUSIP;

proc sort data=CRSP.STOCKNAMES out=CRSP1 (keep=PERMNO NCUSIP comnam namedt nameenddt);
where not missing(NCUSIP);
by PERMNO NCUSIP namedt;
run;

proc sql;
create table CRSP2
as select PERMNO,NCUSIP,comnam,
min(namedt)as namedt,max(nameenddt) as nameenddt
from CRSP1
group by PERMNO, NCUSIP
order by PERMNO, NCUSIP, NAMEDT;
quit;

data CRSP2;
set CRSP2;
by permno ncusip;
if last.ncusip;
label namedt="Start date of CUSIP record";
label nameenddt="End date of CUSIP record";
format namedt nameenddt date9.;
run;

* Create Link Table;
proc sql;
create table LINK1
as select *
from IBES2 as a, CRSP2 as b
where a.CUSIP = b.NCUSIP
order by TICKER, PERMNO, ldate;
quit;

/* Finalizing and Saving an IBES-CRSP Link Table*/;

proc sql;
create table ticklinks
as select *
from link2_2
where ticker not in (select ticker from link1_2);
quit;

/* Create final link table and save it in home directory */
libname home '~';
data home.iclink;
set link1_2 ticklinks;
label CNAME = "Company Name in IBES";
label COMNAM = "Company Name in CRSP";
label SCORE = "Link Score: 0(best) - 6";
run;

proc sort data=home.iclink;
by ticker permno;
run;

* Merging IBES and CRSP datasets using ICLINK table;
proc sql;
create table IBES_CRSP as select*
from ibes.statsum_epsus as a,
home.ICLINK as b,
crsp.msf as c
where a.ticker=b.ticker and b.permno=c.permno and 
intnx('month',a.STATPERS,0,'E') = intnx('month',c.date,0,'E');
quit;

/************************************************************************************
* STEP SIX: Link IBES, CRSP and Conpustat;
*************************************************************************************/

* Suggesttion: Merging IBES and CRSP and comp datasets using ICLINK table;
proc sql;
create table comp_CRSP_IBES as select*
from comp_CRSP as a,
ibes.statsum_epsus as b,
home.ICLINK as c,
crsp.msf as d
where b.ticker=c.ticker and c.permno=d.permno and 
intck('month',a.endfyr,d.date)between 3 and 14;
quit;





Reply all
Reply to author
Forward
0 new messages