TT 2020

## Count Data: Smoking Example

Mullahy (1997), Review of Economics and Statistics, 79, 596-593

library(wooldridge)
library(lmtest)
library(sandwich)

names(smoke)
##  [1] "educ"     "cigpric"  "white"    "age"      "income"   "cigs"
##  [7] "restaurn" "lincome"  "agesq"    "lcigpric"

## Variable Descriptions: smoke

# Specify x'beta
smoking_model <- cigs ~ lcigpric + lincome + restaurn + white + educ + age + agesq
• cigs number of cigarettes smoked per day
• lcigpric log of state cigarette price (cents/pack)
• lincome log of annual income (US Dollars)
• restaurn equals 1 if restaurant has smoking restrictions
• white equals 1 if white
• educ years of schooling
• age age in years
• agesq age squared

## OLS with plain-vanilla SEs

ols <- lm(smoking_model, data = smoke)
coeftest(ols)
##
## t test of coefficients:
##
##               Estimate Std. Error t value  Pr(>|t|)
## (Intercept) -2.6824347 24.2207299 -0.1107   0.91184
## lcigpric    -0.8509044  5.7823214 -0.1472   0.88305
## lincome      0.8690144  0.7287636  1.1925   0.23344
## restaurn    -2.8656213  1.1174059 -2.5645   0.01051 *
## white       -0.5592363  1.4594610 -0.3832   0.70169
## educ        -0.5017533  0.1671677 -3.0015   0.00277 **
## age          0.7745021  0.1605158  4.8251 1.676e-06 ***
## agesq       -0.0090686  0.0017481 -5.1878 2.699e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## OLS with Robust SEs

coeftest(ols, vcov. = vcovHC)
##
## t test of coefficients:
##
##               Estimate Std. Error t value  Pr(>|t|)
## (Intercept) -2.6824347 26.1562565 -0.1026  0.918343
## lcigpric    -0.8509044  6.1132231 -0.1392  0.889334
## lincome      0.8690144  0.6035374  1.4399  0.150296
## restaurn    -2.8656213  1.0215479 -2.8052  0.005151 **
## white       -0.5592363  1.3943263 -0.4011  0.688468
## educ        -0.5017533  0.1632410 -3.0737  0.002186 **
## age          0.7745021  0.1393883  5.5564 3.752e-08 ***
## agesq       -0.0090686  0.0014754 -6.1464 1.250e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## Poisson Regression

pois_reg <- glm(smoking_model, family = poisson(link = 'log'), data = smoke)
coeftest(pois_reg)
##
## z test of coefficients:
##
##                Estimate  Std. Error  z value  Pr(>|z|)
## (Intercept)  0.39644936  0.61394730   0.6457    0.5184
## lcigpric    -0.10596071  0.14339006  -0.7390    0.4599
## lincome      0.10372755  0.02028060   5.1146 3.144e-07 ***
## restaurn    -0.36360594  0.03122216 -11.6458 < 2.2e-16 ***
## white       -0.05520115  0.03741971  -1.4752    0.1402
## educ        -0.05942253  0.00425626 -13.9612 < 2.2e-16 ***
## age          0.11425708  0.00496904  22.9938 < 2.2e-16 ***
## agesq       -0.00137082  0.00005695 -24.0704 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## Quasi-Poisson Regression

quasipois <- glm(smoking_model, family = quasipoisson(link = 'log'), data = smoke)
coeftest(quasipois)
##
## z test of coefficients:
##
##                Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)  0.39644936  2.76738512  0.1433  0.886087
## lcigpric    -0.10596071  0.64633482 -0.1639  0.869778
## lincome      0.10372755  0.09141539  1.1347  0.256508
## restaurn    -0.36360594  0.14073479 -2.5836  0.009777 **
## white       -0.05520115  0.16867044 -0.3273  0.743462
## educ        -0.05942253  0.01918520 -3.0973  0.001953 **
## age          0.11425708  0.02239807  5.1012 3.375e-07 ***
## agesq       -0.00137082  0.00025671 -5.3400 9.292e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## Overdispersion or Underdispersion?

# Extract the estimate of sigma-squared
summary(quasipois)$dispersion ## [1] 20.31782 # Now do it "by hand" yhat <- predict(pois_reg, type = 'response') uhat <- residuals(pois_reg, type = 'response') mean(uhat^2 / yhat) ## [1] 20.11488 ## Robust “Sandwich” SEs for Poisson Regression coeftest(pois_reg, vcov. = vcovHC) ## ## z test of coefficients: ## ## Estimate Std. Error z value Pr(>|z|) ## (Intercept) 0.3964494 3.0227099 0.1312 0.89565 ## lcigpric -0.1059607 0.6787042 -0.1561 0.87594 ## lincome 0.1037276 0.0844975 1.2276 0.21960 ## restaurn -0.3636059 0.1417068 -2.5659 0.01029 * ## white -0.0552011 0.1662139 -0.3321 0.73981 ## educ -0.0594225 0.0194336 -3.0577 0.00223 ** ## age 0.1142571 0.0214898 5.3168 1.056e-07 *** ## agesq -0.0013708 0.0002476 -5.5365 3.086e-08 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## Comparing OLS and Poisson Estimates ybar <- mean(smoke$cigs)
OLS_est <- coefficients(ols)[-1]
pois_est <- coefficients(pois_reg)[-1]
cbind(OLS = OLS_est, Poisson_APE = ybar * pois_est, Poisson = pois_est)
##                   OLS Poisson_APE      Poisson
## lcigpric -0.850904380 -0.92042698 -0.105960710
## lincome   0.869014392  0.90102862  0.103727546
## restaurn -2.865621339 -3.15846053 -0.363605941
## white    -0.559236320 -0.47950441 -0.055201150
## educ     -0.501753267 -0.51617344 -0.059422535
## age       0.774502141  0.99249336  0.114257081
## agesq    -0.009068603 -0.01190761 -0.001370819

Note the age and agesq entries are not partial effects. Why not?