for my project I need to combine the data from all Compustat CRSP and IBES datasets. I tried to follow the Codes on the WRDS Website and created a merged dataset using linking tables. I need monthly or quarterly data.
I linked compustat - crsp and crsp - ibes, but I don't know how to combine all three datasets.
I would be greateful for your help. Thanks a lot.
/************************************************************************************
* STEP ONE: Create Linking Table with 8-digit CUSIP;
************************************************************************************/
* Create 8-digit CUSIP using "NAMES" file;
data compcusip (keep = gvkey cusip cusip8 tic);
set comp.names;
cusip8 = substr (cusip,1,8);
run;
proc print data=compcusip noobs label;
var gvkey tic cusip cusip8;
run;
*Extract CRSP Cusip from "STOCKNAMES" file;
proc sort data=crsp.stocknames (keep=cusip permco permno)out=crspcusip nodupkey;
by cusip;run;
* Merge Compusat cusip with CRSP cusip and create table "total";
proc sql;
create table total as select
compcusip.*, crspcusip.*
from compcusip, crspcusip
where compcusip.cusip8 = crspcusip.cusip;
quit;
run;
proc print noobs data = total;
var gvkey tic cusip8 permno;
run;
/************************************************************************************
* STEP TWO: Extract Compusat data;
************************************************************************************/
* Selected GVKEYS-- use quotes to be consistent with character variables;
* Date range-- applied to FYEAR (Fiscal Year);
%let fyear1= 2012;
%let fyear2= 2019;
* Make extract from Compustat Quarterly Funda file;
data compx2;
set comp.fundq ;
where datadate >= '31dec2011'd;
if indfmt='INDL' and datafmt='STD' and popsrc='D' and consol='C';
* create begin and end dates for fiscal year;
format endfyr begfyr date9.;
endfyr= datadate;
begfyr= intnx('month',endfyr,-11,'beg');
*intnx(interval, from, n, 'aligment');
sxa= sale/at; * compute sales over assets ratio;
run;
proc sort; by gvkey endfyr; run;
/****************************************************************************************
* STEP THREE: Link GVKEYS to CRSP Identifiers;
* Use CCMXPF_LNKHIST table to obtain CRSP identifiers for our subset of companies/dates;
*****************************************************************************************/
*Merge Compusat set with Linking table;
proc sql;
create table CC_link
as select *
from compx2 as a, total as b
where a.gvkey = b.gvkey;
quit;
/************************************************************************************
* STEP FOUR: Option 2: Alternative way of matching CRSP data;
* Match accounting data with fiscal yearends in month 't',
with CRSP return data from month 't+3' to month 't+14' (12 months);
*************************************************************************************/
proc sql;
create table comp_CRSP as select*
from CC_link as a, crsp.msf as b
where a.permno = b.permno and
intck('month',a.endfyr,b.date)between 3 and 14;
quit;
proc download data=comp_CRSP out=mylocal.ccmfundaex; *download output dataset to local location;
run;
/************************************************************************************
* STEP FIVE: CUSIP Method to Link IBES TICKERS and CRSP PERMNOs;
*************************************************************************************/
* Complete list of the IBES TICKERs for all U.S. companies tracked by IBES
** Generate a complete list of the IBES TICKERs for US companies along with all associated historical CUSIPs
** Only observations with non-missing CUSIP are retained;
proc sort data=IBES.IDSUM out=IBES1
(keep=ticker cusip cname sdates);
where USFIRM=1 and not(missing(cusip));
by ticker cusip sdates;
run;
** rows with duplicate CUSIP information for each IBES TICKER should be deleted
** SDATES variable for the company identifying information tracks start dates
** We keep one record per IBES TICKER – CUSIP combination;
proc sql;
create table IBES2
as select *, min(sdates) as fdate, max(sdates) as ldate
from IBES1
group by ticker, cusip
order by ticker, cusip, sdates;
quit;
data IBES2;
set IBES2;
by ticker cusip;
if last.cusip;
label fdate="First Start date of CUSIP record";
label ldate="Last Start date of CUSIP record";
format fdate ldate date9.;
drop sdates;
run;
** Similarly, we will prepare the CRSP PERMNO – CUSIP set using the STOCKNAMES dataset
** The data needs to be arranged by deleting rows with duplicate CUSIP information for each PERMNO,
** and constructing an effective date range for each historical CUSIP;
proc sort data=CRSP.STOCKNAMES out=CRSP1 (keep=PERMNO NCUSIP comnam namedt nameenddt);
where not missing(NCUSIP);
by PERMNO NCUSIP namedt;
run;
proc sql;
create table CRSP2
as select PERMNO,NCUSIP,comnam,
min(namedt)as namedt,max(nameenddt) as nameenddt
from CRSP1
group by PERMNO, NCUSIP
order by PERMNO, NCUSIP, NAMEDT;
quit;
data CRSP2;
set CRSP2;
by permno ncusip;
if last.ncusip;
label namedt="Start date of CUSIP record";
label nameenddt="End date of CUSIP record";
format namedt nameenddt date9.;
run;
* Create Link Table;
proc sql;
create table LINK1
as select *
from IBES2 as a, CRSP2 as b
where a.CUSIP = b.NCUSIP
order by TICKER, PERMNO, ldate;
quit;
/* Finalizing and Saving an IBES-CRSP Link Table*/;
proc sql;
create table ticklinks
as select *
from link2_2
where ticker not in (select ticker from link1_2);
quit;
/* Create final link table and save it in home directory */
libname home '~';
data home.iclink;
set link1_2 ticklinks;
label CNAME = "Company Name in IBES";
label COMNAM = "Company Name in CRSP";
label SCORE = "Link Score: 0(best) - 6";
run;
proc sort data=home.iclink;
by ticker permno;
run;
* Merging IBES and CRSP datasets using ICLINK table;
proc sql;
create table IBES_CRSP as select*
from ibes.statsum_epsus as a,
home.ICLINK as b,
crsp.msf as c
where a.ticker=b.ticker and b.permno=c.permno and
intnx('month',a.STATPERS,0,'E') = intnx('month',c.date,0,'E');
quit;