nysr: Adolescent Work Intensity and Substance Use

Description

NYSR data on adolescent work intensity and substance Use.

Usage

data("nysr")

Arguments

Format

A data frame with 2816 observations on the following 18 variables.

IDS: NYSR identification number
intense: Based on question ``During the school year, about how many hours per week did you normally work at a paid job, or did you not have a job". ``Never": student did not have a job; ``Moderate": 1-19 hours; ``Intense": >=20 hours.
family.income: Household income with 5000 = (between 0-10,000), 15000= (between 10,000 and 20,000),..., 95000 = (between 90,000 and 100,000) and 105,000 (above 100,000).
family.income.impute: Household income with 5000 = (between 0-10,000), 15000= (between 10,000 and 20,000),..., 95000 = (between 90,000 and 100,000) and 105,000 (above 100,000). For subjects with missing family income, the mean is imputed.
family.income.mis: dummy variable for whether household income is missing and the mean is imputed.
family.structure: ``Two Parent Biological": both biological father and mother living with child; ``Two Parent Nonbiological": someone assuming a mother role (biological, adoptive, stepparent) living with a husband who assumes a father role (biological, adoptive, step parent) where both parents are biological; ``Single Parent/Other": any other living situation for child.
highest.education.parent.in.household: Maximum education level of household resident who assumes a mother role (biological, adoptive, stepparent) and household resident who assumes a father role (biological, adoptive, stepparent). If the child is living with a single parent, then this is just the education level of that single parent.
highest.education.parent.in.household.impute: Maximum education level of household resident who assumes a mother role (biological, adoptive, stepparent) and household resident who assumes a father role (biological, adoptive, stepparent). If the child is living with a single parent, then this is just the education level of that single parent. For subjects with missing highest education of parent in household, the mean is imputed.
highest.education.parent.in.household.mis: Dummy variable for whether household income is missing and the mean is imputed.
female: 1 = female, 0 = male
race.black: 1=black race, 0=other
race.hispanic: 1=hispanic race, 0=other
age.teenager: age of teenager. Age is imputed with the mean if it is missing
school.dropout: Dummy variable of whether student has dropped out of school
alcohol.use: Based on question ``How often, if at all, do you drink alcohol, such as beer, wine or mixed drinks, not including at religious services". ``Never": answered ``Never"; ``Moderate": answered ``A few times year" or ``About once a month"; ``Heavy": answered ``A few times a month", ``About once a week", ``A few times a week" or ``Almost every day".
marijuana.use: Based on question ``How often, if ever, have you used marijuana?". ``Never": answered ``Never"; ``Experimenter" answered ``You tried it once or twice"; ``Continual User": answered ``You use it occasionally" or ``You use it regularly".
p: Propensity score.
plogit: Logit of propensity score.

Details

The following code constructed the data as used here. wave1data$family.income=rep(NA,nrow(wave1data)) wave1data$family.income[wave1data$PINCOME1==1 & wave1data$PINCOME2==4]=5000 wave1data$family.income[wave1data$PINCOME1==1 & wave1data$PINCOME2==3]=15000 wave1data$family.income[wave1data$PINCOME1==1 & wave1data$PINCOME2==2]=25000 wave1data$family.income[wave1data$PINCOME1==1 & wave1data$PINCOME2==1]=35000 wave1data$family.income[wave1data$PINCOME1==2 & wave1data$PINCOME3==1]=45000 wave1data$family.income[wave1data$PINCOME1==2 & wave1data$PINCOME3==2]=55000 wave1data$family.income[wave1data$PINCOME1==2 & wave1data$PINCOME3==3]=65000 wave1data$family.income[wave1data$PINCOME1==2 & wave1data$PINCOME3==4]=75000 wave1data$family.income[wave1data$PINCOME1==2 & wave1data$PINCOME3==5]=85000 wave1data$family.income[wave1data$PINCOME1==2 & wave1data$PINCOME3==6]=95000 wave1data$family.income[wave1data$PINCOME1==2 & wave1data$PINCOME3==7]=105000 # For subjects with missing family income data, fill in mean and create a missing data indicator wave1data$family.income.mis=is.na(wave1data$family.income) #wave1data$family.income[wave1data$family.income.mis==1]=mean(wave1data$family.income,na.rm=TRUE)

# Find family structure variable wave1data$family.structure=rep(NA,nrow(wave1data)) # wave1data$family.structure[wave1data$PMOTHER==1 & wave1data$PLIVE==1 & wave1data$PSPRELAT==1]="Two Parent Biological" # wave1data$family.structure[wave1data$PMOTHER==1 & wave1data$PLIVE==2 & wave1data$PPARTPAR==1]="Two Parent Biological" # wave1data$family.structure[wave1data$PFATHER==1 & wave1data$PLIVE==1 & wave1data$PSPRELAT==1]="Two Parent Biological" # wave1data$family.structure[wave1data$PFATHER==1 & wave1data$PLIVE==2 & wave1data$PPARTPAR==1]="Two Parent Biological" # wave1data$family.structure[wave1data$PMOTHER==1 & (wave1data$PSPRELAT==2 | wave1data$PSPRELAT==3)]="Two Parent Nonbiological" # wave1data$family.structure[wave1data$PFATHER==1 & (wave1data$PSPRELAT==2 | wave1data$PSPRELAT==3)]="Two Parent Nonbiological" # wave1data$family.structure[(wave1data$PMOTHER==2 | wave1data$PMOTHER==3) & (wave1data$PSPRELAT==1 | wave1data$PSPRELAT==2 | wave1data$PSPRELAT==3)]="Two Parent Nonbiological" # wave1data$family.structure[(wave1data$PFATHER==2 | wave1data$PFATHER==3) & (wave1data$PSPRELAT==1 | wave1data$PSPRELAT==2 | wave1data$PSPRELAT==3)]="Two Parent Nonbiological" # wave1data$family.structure[is.na(wave1data$family.structure)]="Single Parent/Other"

wave1data$family.structure[wave1data$PMOTHER==1 & wave1data$PLIVE==1 & wave1data$PSPRELAT==1]=1 wave1data$family.structure[wave1data$PMOTHER==1 & wave1data$PLIVE==2 & wave1data$PPARTPAR==1]=1 wave1data$family.structure[wave1data$PFATHER==1 & wave1data$PLIVE==1 & wave1data$PSPRELAT==1]=1 wave1data$family.structure[wave1data$PFATHER==1 & wave1data$PLIVE==2 & wave1data$PPARTPAR==1]=1 wave1data$family.structure[wave1data$PMOTHER==1 & (wave1data$PSPRELAT==2 | wave1data$PSPRELAT==3)]=1 wave1data$family.structure[wave1data$PFATHER==1 & (wave1data$PSPRELAT==2 | wave1data$PSPRELAT==3)]=1 wave1data$family.structure[(wave1data$PMOTHER==2 | wave1data$PMOTHER==3) & (wave1data$PSPRELAT==1 | wave1data$PSPRELAT==2 | wave1data$PSPRELAT==3)]=1 wave1data$family.structure[(wave1data$PFATHER==2 | wave1data$PFATHER==3) & (wave1data$PSPRELAT==1 | wave1data$PSPRELAT==2 | wave1data$PSPRELAT==3)]=1 wave1data$family.structure[is.na(wave1data$family.structure)]=0

# Highest parent education in household dadeductemp=rep(NA,nrow(wave1data)) dadeductemp[wave1data$PDADEDUC==0 | wave1data$PDADEDUC==1 | wave1data$PDADEDUC==2]=0 dadeductemp[wave1data$PDADEDUC==3 | wave1data$PDADEDUC==4 | wave1data$PDADEDUC==5 | wave1data$PDADEDUC==7]=1 dadeductemp[wave1data$PDADEDUC==6 | wave1data$PDADEDUC==8]=2 dadeductemp[wave1data$PDADEDUC==9 | wave1data$PDADEDUC==10]=3 dadeductemp[wave1data$PDADEDUC>=11 & wave1data$PDADEDUC<=14]=4 momeductemp=rep(NA,nrow(wave1data)) momeductemp[wave1data$PMOMEDUC==0 | wave1data$PMOMEDUC==1 | wave1data$PMOMEDUC==2]=0 momeductemp[wave1data$PMOMEDUC==3 | wave1data$PMOMEDUC==4 | wave1data$PMOMEDUC==5 | wave1data$PMOMEDUC==7]=1 momeductemp[wave1data$PMOMEDUC==6 | wave1data$PMOMEDUC==8]=2 momeductemp[wave1data$PMOMEDUC==9 | wave1data$PMOMEDUC==10]=3 momeductemp[wave1data$PMOMEDUC>=11 & wave1data$PMOMEDUC<=14]=4 parents.highest.educ=pmax(dadeductemp,momeductemp,na.rm=TRUE) # wave1data$highest.education.parent.in.household=rep(NA,nrow(wave1data)) # wave1data$highest.education.parent.in.household[parents.highest.educ==0]="Less than high school" # wave1data$highest.education.parent.in.household[parents.highest.educ==1]="High school degree" # wave1data$highest.education.parent.in.household[parents.highest.educ==2]="AA/vocational degree" # wave1data$highest.education.parent.in.household[parents.highest.educ==3]="BA/BS degree" # wave1data$highest.education.parent.in.household[parents.highest.educ==4]="Higher degree" # wave1data$highest.education.parent.in.household[is.na(parents.highest.educ)]="Missing"

wave1data$highest.education.parent.in.household=rep(NA,nrow(wave1data)) wave1data$highest.education.parent.in.household[parents.highest.educ==0]=0 wave1data$highest.education.parent.in.household[parents.highest.educ==1]=1 wave1data$highest.education.parent.in.household[parents.highest.educ==2]=1 wave1data$highest.education.parent.in.household[parents.highest.educ==3]=2 wave1data$highest.education.parent.in.household[parents.highest.educ==4]=3 #wave1data$highest.education.parent.in.household[is.na(parents.highest.educ)]=mean(parents.highest.educ,na=T) wave1data$highest.education.parent.in.household.mis=is.na(parents.highest.educ)

# Gender of teenager wave1data$gender=rep(NA,nrow(wave1data)) #wave1data$gender[wave1data$TEENSEX==0]="MALE" #wave1data$gender[wave1data$TEENSEX==1]="FEMALE" wave1data$female=wave1data$TEENSEX

# Race/ethnicity of teenager wave1data$race.ethnicity=rep(NA,nrow(wave1data)) # wave1data$race.ethnicity[wave1data$TEENRACE==1]="White/Other" # wave1data$race.ethnicity[wave1data$TEENRACE==2]="African American" # wave1data$race.ethnicity[wave1data$TEENRACE==3]="Hispanic" # wave1data$race.ethnicity[wave1data$TEENRACE>=4]="White/Other"

wave1data$race.black=wave1data$TEENRACE==2 wave1data$race.hispanic=wave1data$TEENRACE==3

# Age of teenager wave1data$age.teenager=wave1data$AGE wave1data$age.missing=(wave1data$AGE==888) # Fill in mean value for teenager with missing age wave1data$age.teenager[wave1data$AGE==888]=NA #wave1data$age.teenager[is.na(wave1data$age.teenager)]=mean(wave1data$age.teenager,na.rm=TRUE)

# Has student dropped out of school wave1data$school.dropout=(wave1data$PSCHTYP==4)

# Work intensity (intensity of adolescent employment) wave1data$work.intensity=rep(NA,nrow(wave1data)) wave1data$work.intensity[wave1data$WORKHRS==0]="Nonworker" # Intense: >=20 hours wave1data$work.intensity[wave1data$WORKHRS>=1 & wave1data$WORKHRS<20]="Moderate" wave1data$work.intensity[wave1data$WORKHRS>=20 & wave1data$WORKHRS<200]="Intense"

# Alcohol use wave1data$alcohol.use=rep(NA,nrow(wave1data)) wave1data$alcohol.use[wave1data$DRINK==7]="Never" wave1data$alcohol.use[wave1data$DRINK==5 | wave1data$DRINK==6]="Moderate" wave1data$alcohol.use[wave1data$DRINK<=4]="Heavy"

# Marijuana use wave1data$marijuana.use=rep(NA,nrow(wave1data)) wave1data$marijuana.use[wave1data$POT==1]="Never" wave1data$marijuana.use[wave1data$POT==2]="Experimenter" wave1data$marijuana.use[wave1data$POT==3 | wave1data$POT==4]="Continual User"

## Drop from consideration for matching fifth and sixth graders; students missing work intnsity, alcohol use and marijuana use; students with moderate working intensity wave1data$not.included.in.sample=(wave1data$PSCHGRA2==5 | wave1data$PSCHGRA2==6 | wave1data$age.missing==TRUE | is.na(wave1data$work.intensity) | is.na(wave1data$alcohol.use) | is.na(wave1data$marijuana.use) | wave1data$work.intensity=="Moderate")

# Create variable which identifies whether wave 1 interview exists for subject interviewerdata=read.csv("C:/Users/ruoqi/Desktop/Penn/research/Dylan-ThickDescription/ivlink.csv") wave1interviews=interviewerdata$ids[!(interviewerdata$iver=="W3" | interviewerdata$iver=="W4")] wave1data$wave1.interview=wave1data$IDS

wave1data$wave1.interview=wave1data$wave1.interview& (!wave1data$family.income.mis) & (!wave1data$highest.education.parent.in.household.mis)

data=wave1data dsub=data[which(data$not.included.in.sample==FALSE),] dim(dsub) #2816 932 dsub=dsub[which(dsub$work.intensity!='Moderate'),] dim(dsub) # 2816 932 dsub$intense=rep(0,dim(dsub)[1]) dsub$intense[which(dsub$work.intensity=='Intense')]=1

#propensity score dsub$family.income.impute=dsub$family.income dsub$family.income.impute[dsub$family.income.mis==1]=mean(dsub$family.income,na.rm=TRUE) dsub$highest.education.parent.in.household.impute=dsub$highest.education.parent.in.household dsub$highest.education.parent.in.household.impute[dsub$highest.education.parent.in.household.mis==1]=mean(dsub$highest.education.parent.in.household,na.rm=T) model<-glm(intense~family.income.impute+family.income.mis+ highest.education.parent.in.household.impute+highest.education.parent.in.household.mis+ female+race.black+race.hispanic+age.teenager+school.dropout, family=binomial(link='logit'),data=dsub,x=TRUE)

x=subset(dsub[c('family.income.impute','family.income.mis','family.structure', 'highest.education.parent.in.household.impute','highest.education.parent.in.household.mis', 'female','race.black','race.hispanic','age.teenager','school.dropout')]) pred <- predict(model, newdata = x, type = 'response') dsub$p=pred dsub$plogit=car::logit(pred) #boxplot(prop~intense,data=dsub) dsub=subset(dsub[c('IDS','intense','family.income','family.income.impute','family.income.mis','family.structure', 'highest.education.parent.in.household','highest.education.parent.in.household.impute','highest.education.parent.in.household.mis', 'female','race.black','race.hispanic','age.teenager','school.dropout','alcohol.use','marijuana.use','p','plogit')]) nysr=dsub save(nysr, file = "nysr.rda")

References

Longest, K. C. and Shanahan M. J., Adolescent Work Intensity and Substance Use: The Mediational and Moderational Roles of Parenting, Journal of Marriage and Family, Vol. 69, No. 3, pp. 703-720.

Examples

Run this code

# NOT RUN {
data("nysr")
summary(nysr)
# }

Run the code above in your browser using DataLab