HT 2020

## Birthweight Dataset

Mullahy (1997; ReStat)

# Load packages for robust standard errors (install them first!)
library(sandwich)
library(lmtest)

# Load the data from the wooldridge package (install it first!)
library(wooldridge)

# View the names of the columns
names(bwght)
##  [1] "faminc"   "cigtax"   "cigprice" "bwght"    "fatheduc" "motheduc"
##  [7] "parity"   "male"     "white"    "cigs"     "lbwght"   "bwghtlbs"
## [13] "packs"    "lfaminc"

## Description of relevant variables: bwght

• motheduc motherâ€™s years of education
• white equals one if white
• faminc 1988 family income in $1000s • lfaminc log of faminc • cigs number of cigarettes smoked per day while pregnant • packs number of packes of smoked per day while pregnant ## Part (a) - Define the outcome variable smokes # Check that cigs and packs agree about who smoked during pregnancy any(bwght$cigs == 0 & bwght$packs > 0) ## [1] FALSE any(bwght$packs == 0 & bwght$cigs > 0) ## [1] FALSE # Since they agree, we can use either to define smokes bwght$smokes <- ifelse(bwght$cigs > 0, yes = 1, no = 0) ## Part (a) - Fit a Probit Regression smoking_model <- smokes ~ motheduc + white + lfaminc probit <- glm(smoking_model, family = binomial(link = 'probit'), data = bwght) coeftest(probit) ## ## z test of coefficients: ## ## Estimate Std. Error z value Pr(>|z|) ## (Intercept) 1.126273 0.250374 4.4984 6.848e-06 *** ## motheduc -0.145060 0.020697 -7.0087 2.405e-12 *** ## white 0.189677 0.110680 1.7137 0.0865758 . ## lfaminc -0.166911 0.049842 -3.3488 0.0008115 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## Part (b) - Predictions for Alice and Beth mean_faminc <- mean(bwght$faminc)
Alice <- c(motheduc = 12, white = 1, lfaminc = log(mean_faminc))
Beth <- c(motheduc = 16, white = 1, lfaminc = log(mean_faminc))
predict_me <- data.frame(rbind(Alice, Beth))
predictions <- predict(probit, newdata = predict_me, type = 'response')
predictions
##      Alice       Beth
## 0.16183185 0.05853452
diff(predictions)
##       Beth
## -0.1032973

## Part (c) - Average partial effect of lfaminc

# Average of g(x'beta_hat) where g is the std. normal density: dnorm
# (predict defaults to the scale of x'beta_hat)
mean(dnorm(predict(probit))) * coef(probit)[4]
##     lfaminc
## -0.03614676

## Part (d) - Pseudo R-squared

# Fit model with only an intercept
model0 <- smokes ~ 1
probit0 <- glm(model0, family = binomial(link = 'probit'), data = bwght)

# Pseudo R-squared
1 - logLik(probit) / logLik(probit0)
## 'log Lik.' 0.07838101 (df=4)