dm 'log;clear;output;clear;'; options ps=50 ls=70 pageno=1; goptions reset=global border ftext=swiss gunit=cm htext=0.4 htitle=0.5; goptions display noprompt; *********************************************************************; ** **; ** AUTHOR: Chris Bilder **; ** COURSE: STAT 4043 **; ** DATE: 11-4-00 **; ** UPDATE: **; ** PURPOSE: Illustrate Chapter 9 material using the NBA guard data **; ** NOTES: **; ** **; *********************************************************************; title1 'Chris Bilder, STAT 4043'; *Read in the data; *Note the use of $ signs. This is used when a character variable is read in'; data set1; infile 'd:/chris/osu/stat4043/chapter6/nba_data.txt'; input last_Name $ first_Initial $ Games PPM MPG Height FTP FGP Age; run; data t_set; input alpha n p; *studentized residual critical value; t_stud_res1 = tinv(1-alpha/2, n-p); *studentized residual critical value with Bon. adj.; t_stud_res2 = tinv(1-alpha/(2*n), n-p); *studentized deleted residual critical value; t_stud_del_res1 = tinv(1-alpha/2, n-p-1); *studentized deleted residual critical value with Bon. adj.; t_stud_del_res2 = tinv(1-alpha/(2*n), n-p-1); datalines; 0.05 105 5 ; run; title2 'T distribution values'; proc print data=t_set; run; *Get critical value for Cook Distance; data F_set; input alpha n p; F = finv(alpha, p, n-p); datalines; 0.5 105 5 ; run; title2 'F distribution values'; proc print data=f_set; run; title2 'PROC REG output'; proc reg data=set1; model PPM = MPG Height FGP Age / partial r influence vif; id last_name; output out=out_set1 residual=residual student=stud_res rstudent=s_del_res h=h DFFITS=dffits cookd=cookd predicted=predicted; symbol1 v=dot h=.1 cv=blue; plot residual.*MPG / nostat vref=0 cvref=red; plot residual.*Height / nostat vref=0 cvref=red; plot residual.*FGP / nostat vref=0 cvref=red; plot residual.*Age / nostat vref=0 cvref=red; plot rstudent.*obs. / nostat vref=0 cvref=red vref=1.98422 3.61391 -1.98422 -3.61391; plot h.*obs. / nostat vref=0 cvref=red vref=0.095 0.2 0.5; plot DFFITS.*obs. / nostat vref=0 cvref=red vref=-0.4364 -2 0.4364 2; plot COOKD.*obs. / nostat vref=0 cvref=red vref=0.87620; ods output OutputStatistics=resid; run; title2 'Observations that have possible outlying Y observations'; proc print data=out_set1; where stud_Res>1.98397 or stud_Res<-1.98397 or s_del_res>1.98422 or s_del_res<-1.98422; var last_name first_initial residual stud_res s_del_res; run; title2 'Observations that have a large h_ii values'; proc print data=out_set1; where h>0.095; var last_name first_initial h residual stud_res s_del_res; run; title2 'Observations that have a large DFFITS values'; proc print data=out_set1; where DFFITS>0.4364 or DFFITS<-0.4364; var last_name first_initial DFFITS residual; run; title2 'Observations that have a large COOK''s Distance values'; proc print data=out_set1; where COOKD>0.87620 ; var last_name first_initial CookD residual; run; *Construct a bubble plot similar to Figure 9.8; proc gplot data=out_set1; bubble residual*predicted=DFFITS / vaxis=axis1 haxis=axis2 frame grid vref=0 cvref=red bcolor=blue; title2 "Residual vs. Predicted with Bubble proportional to DFFITS"; axis1 label = (a=90 'Residual') length=12 order = (-0.35 to 0.35 by 0.05); axis2 label=('Predicted') length=12 ; run; *Construct a bubble plot similar to Figure 9.8; proc gplot data=out_set1; bubble residual*predicted=cookd / vaxis=axis1 haxis=axis2 frame grid vref=0 cvref=red bcolor=blue; title2 'Residual vs. Predicted with Bubble proportional to Cook''s Distance'; axis1 label = (a=90 'Residual') length=12 order = (-0.35 to 0.35 by 0.05); axis2 label=('Predicted') length=12 ; run; title2 'Observations that have a large DFBETAS values'; proc print data=resid; where DFB_MPG>0.1952 or DFB_MPG<-0.1952 or DFB_Height>0.1952 or DFB_Height<-0.1952 or DFB_FGP>0.1952 or DFB_FGP<-0.1952 or DFB_Age>0.1952 or DFB_Age<-0.1952; var last_name DFB_MPG DFB_Height DFB_FGP DFB_Age; run; title2 'Show how to calculate VIF for MPG'; proc reg data=set1; model MPG = Height FGP Age ; run; quit;