#Data Generation ##strength

set.seed(123)
f_e=data.frame(strength=rnorm(256,62,6),label='female')
m_e=data.frame(strength=rnorm(544,68,10),label='male') #males have higher strength on average

##Background variables

set.seed(123)
f_e$householdIncome=rnorm(256,80000,15000)
f_e$age=rdunif(256,b=55,a=18)
m_e$householdIncome=rnorm(544,80000,10000)
m_e$age=rdunif(544,b=60,a=16)
set.seed(123)
fedu_u=rdunif(256,b=2,a=-2)
f_e$edu=rpois(256,lambda=f_e$householdIncome*0.0001+fedu_u) #poisson dist. since discrete
medu_u=rdunif(544,b=2,a=-2)
m_e$edu=rpois(544,lambda=m_e$householdIncome*0.00008+medu_u)

7 = college degree females: compared to male group, more around or above college due to stricter self-selection affected by female mentality males: mostly around high school due to looser self-selection affected by male mentality error terms characterize employees’ self-made merit

set.seed(123)
frace_u=rdunif(256,b=3,a=-3)
flambda=f_e$householdIncome*0.000003+frace_u
for(i in 1:length(flambda)){
  if(flambda[i]<0){
    flambda[i]=0
  }
}
f_e$race=rpois(256,lambda=flambda) #poisson distribution since discrete
mrace_u=rdunif(544,b=3,a=-3)
mlambda=m_e$householdIncome*0.000005+mrace_u
for(i in 1:length(mlambda)){
  if(mlambda[i]<0){
    mlambda[i]=0
  }
}
m_e$race=rpois(544,lambda=mlambda)

1: white, 2: black, 3: Latinx, 4: Asian Mostly white in both groups. ##proxy: height By common sense, height is correlated with strength, age, race. Biased: females and males have different functions for height.

set.seed(123)
f_u=rnorm(256,0,4)
for (i in 1:nrow(f_e)){
  f_e$height[i]=45+f_e$strength[i]^2*0.002+f_e$age[i]*0.12+f_e$race[i]^(0.5)*0.001+f_u[i]
}
m_u=rnorm(544,0,4)
for (i in 1:nrow(m_e)){
  m_e$height[i]=50+m_e$strength[i]^2*0.003+m_e$age[i]*0.11+m_e$race[i]^(0.5)*0.001+m_u[i]
}
employee=rbind(f_e,m_e)

#IAT Expected: g_binary in second step is significant since true _1 is not zero.

cut_employee = employee %>% mutate(g_binary = if_else(label == 'female',1,0))
first_step=lm(height~strength, data=cut_employee)
summary(first_step)

Call:
lm(formula = height ~ strength, data = cut_employee)

Residuals:
    Min      1Q  Median      3Q     Max 
-6.1042 -2.5862  0.4193  2.5013  8.2949 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  4.21384    0.81208   5.189 2.68e-07 ***
strength     0.91522    0.01215  75.339  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 3.199 on 798 degrees of freedom
Multiple R-squared:  0.8767,    Adjusted R-squared:  0.8766 
F-statistic:  5676 on 1 and 798 DF,  p-value: < 2.2e-16
cut_employee$resid=first_step$residuals
second_step=lm(resid~g_binary, data=cut_employee)
summary(second_step)

Call:
lm(formula = resid ~ g_binary, data = cut_employee)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.6131 -1.3449 -0.1628  1.2295  6.4535 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  1.84134    0.07436   24.76   <2e-16 ***
g_binary    -5.75418    0.13145  -43.78   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.734 on 798 degrees of freedom
Multiple R-squared:  0.706, Adjusted R-squared:  0.7056 
F-statistic:  1916 on 1 and 798 DF,  p-value: < 2.2e-16

_0 is 0.91522, and _1 is -5.75418 (will be compared to MLR later).

#MLR Expected: different _0, _1; coefficient for g_binary is significant

mlr_employee = employee %>% mutate(g_binary = if_else(label == 'female',1,0)) %>% select(-label)
mlr=lm(height~., data=mlr_employee)
summary(mlr)

Call:
lm(formula = height ~ ., data = mlr_employee)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.89707 -0.27005 -0.07324  0.14183  2.16125 

Coefficients:
                  Estimate Std. Error  t value Pr(>|t|)    
(Intercept)      7.094e+00  1.484e-01   47.819   <2e-16 ***
strength         8.124e-01  1.738e-03  467.289   <2e-16 ***
householdIncome  2.068e-05  1.388e-06   14.897   <2e-16 ***
age              1.137e-01  1.199e-03   94.880   <2e-16 ***
edu              6.245e-03  4.891e-03    1.277    0.202    
race            -1.317e-03  9.309e-03   -0.141    0.888    
g_binary        -6.188e+00  3.447e-02 -179.513   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.421 on 793 degrees of freedom
Multiple R-squared:  0.9979,    Adjusted R-squared:  0.9979 
F-statistic: 6.214e+04 on 6 and 793 DF,  p-value: < 2.2e-16

_0 is 0.8124, and _1 is -6.188.

#GLASSO

gl_employee=mlr_employee
mat=as.matrix(gl_employee) #nxp matrix
mat=scale(mat)
varCov=var(mat) #variance-covariance matrix (non-sparse)
lambdas = seq(0.05,0.33,0.025)
lambdass = c(0.001, lambdas,1000)
for (i in 1:length(lambdass)){
  glasso_m=glasso(varCov, rho = lambdass[i])
  rownames(glasso_m$wi)=colnames(gl_employee)
  colnames(glasso_m$wi)=colnames(gl_employee)
  gl=graph_from_adjacency_matrix(glasso_m$wi, weighted=TRUE, mode="upper", diag=FALSE,add.rownames="code")
  set.seed(123)
  coords <-layout.reingold.tilford(gl)
  plot(gl, layout = coords)
}

lambdass
 [1]    0.001    0.050    0.075    0.100    0.125    0.150    0.175    0.200
 [9]    0.225    0.250    0.275    0.300    0.325 1000.000