Forum: help

RE: Problem modeling dispersion of random effects [ Reply ]
By: Lars Ronnegard on 2016-12-16 14:19

I suggest you check the dimensions of all the design/incidence matrices that you have specified. The following example code worked fine for me.

I hope you find this example useful.

/Lars

###############
set.seed(1234)
N=200
k1=20
m1 = N/k1
k2=40
m2 = N/k2
k1.disp=5
k2.disp=5
X1_disp <- diag(k1.disp)%x%rep(1,k1/k1.disp)
eta1 <- X1_disp%*%rnorm(k1.disp)
X2_disp <- diag(k2.disp)%x%rep(1,k2/k2.disp)
eta2 <- X2_disp%*%rnorm(k2.disp)
u1 <- rnorm(k1, 0, exp(eta1))
u2 <- rnorm(k2, 0, exp(eta2))
Z1 <- diag(k1)%x%rep(1,m1)
Z2 <- diag(k2)%x%rep(1,m2)
y <- Z1%*%u1 + Z2%*%u2 + rnorm(N)
X <- matrix(1,N,1)
library(hglm)
mod1 <- hglm(y=y, X=X, Z=cbind(Z1,Z2), RandC = c(k1,k2)) #Model with homogeneous variances
mod2 <- hglm(y=y, X=X, Z=cbind(Z1,Z2), RandC = c(k1,k2), X.rand.disp=list(X1_disp, X2_disp))
summary(mod2)
###########

RE: Problem modeling dispersion of random effects [ Reply ]
By: Mauricio Bucca on 2016-12-16 00:02

[forum:43756]

Hi Lars,
I'm sorry to bother you with these questions. I'm saw the videos and I've read the tutorial, but so far I haven't find a solution to my problem. For some reason, when I try to model the dispersion of two (nested) random effects, I can't get it to work. With one random effect works perfectly fine, but I does not with 2. I assume I'm writing something incorrectly in the code, but I haven't been able to figure it out.

I hope you could help me out. Thanks!

This is my code:

# Model estimated with lmer

model <- lmer( Outcome ~ (1| M_ID/Child_ID) + Child_IAge + I(Child_IAge^2) + year, REML=TRUE, data=subdata); summary(model)

# Design matrix from lmer (includes the two random components)

Z<- t(do.call(Matrix::rBind,getME(model,"Ztlist")))

# number of columns used by each random effect
nG1 <- length(table(model@flist$`Child_ID:M_ID`))
nG2 <- length(table(model@flist$`M_ID`))

# Individual and family level predictors

# Family's race
Race_M_ID <- round(aggregate(subdata$Child_IRace~subdata$M_ID, FUN=mean),0)
Race_M_ID <- as.matrix(Race_M_ID[,2])

# Child's race and sex
Race_Child_ID <- aggregate(subdata$Child_IRace~subdata$Child_ID, FUN=mean)
Race_Child_ID <- as.matrix(Race_Child_ID[,2])

Sex_Child_ID <- aggregate(subdata$Child_Sex_Individual~subdata$Child_ID, FUN=mean)
Sex_Child_ID <- as.matrix(Sex_Child_ID[2])

# model matrix fixed effect
X = model.matrix(~ subdata$Child_IAge + subdata$year)

Z1 = Z[,1:nG1]
Z2 = Z[,nG1+1:nG2]

# Modeling all variance components

##### WORKS WITH 1 RANDOM EFFECT

mod2 <- hglm(X = X, Z = Z1, y = subdata$Outcome,
family = gaussian(), rand.family = gaussian(),
method = "EQL", RandC = c(nG1),
X.disp = model.matrix(~ factor(subdata$Child_IRace)*factor(subdata$Child_Sex_Individual)), link.disp ="log",
X.rand.disp = list(model.matrix(~ factor(Race_Child_ID)*factor(Sex_Child_ID))),
verbose= TRUE, conv = 1e-6, maxit = 100, data = subdata)

summary(mod2)

##### DOESNT WORKS WITH 2 RANDOM EFFECTS

mod2 <- hglm(X = X, Z = cbind(Z1,Z2), y = subdata$Outcome,
family = gaussian(), rand.family = list(gaussian(), gaussian()),
method = "EQL", RandC = c(nG1,nG2),
X.disp = model.matrix(~ factor(subdata$Child_IRace)*factor(subdata$Child_Sex_Individual)), link.disp ="log",
X.rand.disp = list(model.matrix(~ factor(Race_Child_ID)*factor(Sex_Child_ID)), model.matrix(~ factor(Race_M_ID)) ),
verbose= TRUE, conv = 1e-6, maxit = 100, data = subdata)

summary(mod2)

RE: Problem modeling dispersion of random effects [ Reply ] By: Lars Ronnegard on 2016-12-15 07:37	[forum:43754]
Mauricio I suggest you have a look at the tutorial videos on YouTube at http://www.youtube.com/playlist?list=PLn1OmZECD-n15vnYzvJDy5GxjNpVV5Jr8 I think these will clarify how to specify the input options. /Lars

RE: Problem modeling dispersion of random effects [ Reply ]
By: Mauricio Bucca on 2016-12-14 14:51

[forum:43753]

Thank you very much Lars. It worked perfectly,

Now, I have a final question I hope you could help me with. I'm a trying to fit a hierarchical model with 3 levels (individual-year,individual,family) and I want to model dispersion in all levels. However, I'm a not quite sure how I should specify this in the code. I'm also not sure about how to add the design matriz for the random effects.

This is what I'm doing:

# =========== Load data

setwd(path.data)
name <- paste("myPSID","dta", sep=".")
DATA <- read.dta("myPSID_sibcorr.dta")

setwd(path.analysis)
source("themeggplot.R")

## Creates a subset of the data for analysis and keep only observations with no-missing data in the dependent variables ##

Data <- DATA #subset of DATA
Data <- Data[order(Data$M_ID, Data$Child_ID),] #order
Data[,"Outcome"] <- NA
Data$Outcome <- log(DATA$Child_adj_Y_Family_)
Data$Outcome[Data$Outcome ==-Inf] <- NA
Data <- Data[which(Data$Outcome!="NA"), ] # keeps only vars with info in the outcome variables

subdata <- subdata[c("Outcome","Child_IRace", "Child_IAge", "year", "M_ID", "Child_ID")] # Subset of vars to use
subdata <- na.omit(subdata) # keeps only complete information observations

#============= Fit the model with with lmer

model <- lmer( Outcome ~ (1| M_ID/Child_ID) + Child_IAge + I(Child_IAge^2) + year, REML=TRUE, data=subdata); summary(model)

#============= Extracts Design matriz random effects from model above

Z = t(do.call(Matrix::rBind,getME(model,"Ztlist")))

#========= Creates variables to use as predictors of the random effects variances

# Family level

Race_M_ID <- data.frame(round(aggregate(subdata$Child_IRace~subdata$M_ID, FUN=mean),0))
Race_M_ID <- Race_M_ID[,2]

# Individual level

Race_Child_ID <- data.frame(aggregate(subdata$Child_IRace~subdata$Child_ID, FUN=mean))
Race_Child_ID <- Race_Child_ID[,2]

#========= Try to model everything with hglm. NOT WORKING

mod2 <- hglm(X = model.matrix(~ subdata$Child_IAge + subdata$year ), Z = Z , y = subdata$Outcome, family = gaussian(link = identity),
rand.family = gaussian(link = identity), method = "EQL",
conv = 1e-6, maxit = 1000, fixed = y ~ 1,
random = ~ 1 | M_ID/Child_ID, X.disp = model.matrix(~ factor(subdata$Child_IRace)), link.disp = "log",
rand.disp = list( model.matrix(~ factor(Race_M_ID)) , model.matrix(~ factor(Race_Child_ID)) ), link.rand.disp = "log",
verbose= TRUE, data = subdata)
summary(mod2)

BEST and I'd really appreciate any hint on how to proceed.

RE: Problem modeling dispersion of random effects [ Reply ]
By: Lars Ronnegard on 2016-12-13 13:53

[forum:43745]

You want to fit both a linear predictor for the dispersion parameter (residual variance) and a linear predictor for the variance of the random effects. The following works:
###########
mod2 <- hglm(X = matrix(rep(1,n*k)), y = Y, Z = (I.k%x%One.n), family = gaussian(link = identity),
rand.family = gaussian(link = identity), method = "EQL",
conv = 1e-6, maxit = 1000, X.disp = model.matrix(~ Z), X.rand.disp = list(model.matrix(~ z)))
summary(mod2)
##################

Note that you need to specify X.rand.disp as a list and the reason for this is that you may have several variance components to fit. In your example you only have one.

Hope you find this useful,
Lars

Problem modeling dispersion of random effects [ Reply ]
By: Mauricio Bucca on 2016-12-13 05:55

[forum:43744]

Hi,

I'm trying to model the dispersion of both the residual error and the random effects, but I can't get it to work when I add the design matrix for the fixed effects in the random effects dispersion part of the model. If I only model the residual variance it works perfectly.

The error message I'm getting is the following:
Error in model.frame.default(formula = devu/hvu ~ X.rand.disp[[K]] - 1, :
variable lengths differ (found for 'X.rand.disp[[K]]')

The following is the code to produce the simulated data I'm using and the estimation:

######### Simulated Data ############

N=1 # Number of simulations
n=3 # Number of individuals by group
k=50 # Number of groups

One.k=matrix(rep(1,k))
One.n=matrix(rep(1,n))
I.k=diag(k)
I.n=diag(n)
mu=0

z = runif(k,1,5) # indep variable in var function at group level
Z = (I.k%x%One.n)%*%z # indep variable in var function at individual level

LAMBDA_a0 = -5
LAMBDA_a = 1.3
var.a = rlnorm(k, LAMBDA_a0 + LAMBDA_a*z,0.0001); min(var.a); max(var.a)
sd.a = sqrt(var.a)

LAMBDA_e0 = 5
LAMBDA_e = -4.2
var.e = rlnorm(k*n, LAMBDA_e0 + LAMBDA_e*Z,0.0001); min(var.e); max(var.e)
sd.e = sqrt(var.e)

a=rnorm(k,0,sd.a)
e=rnorm(k*n,0,sd.e)

for (i in 1:N) {

indicator=matrix(rep(1:k)) # vector with group indicator for each group
group=(I.k%x%One.n)%*%indicator # vector with group indicator for each indiv

Y=(One.k%x%One.n)*mu + (I.k%x%One.n)%*%a + (I.k%x%I.n)%*%e
# Creates Y = mu + random intercept+ random error

}

######### Estimation ############

# DOESNT WORK
hglm(X = matrix(rep(1,n*k)), y = Y, Z = (I.k%x%One.n), family = gaussian(link = identity),
rand.family = gaussian(link = identity), method = "EQL",
conv = 1e-6, maxit = 1000, fixed = y ~ 1,
random = ~ 1 | group, X.disp = model.matrix(~ Z), disp = ~ 1 + Z, link.disp = "log",
X.rand.disp = model.matrix(~ Z), rand.disp = ~ 1 + Z, link.rand.disp = "log",
verbose= TRUE, data = Data)

# THIS WORKS (only models residual variance):

hglm(X = matrix(rep(1,n*k)), y = Y, Z = (I.k%x%One.n), family = gaussian(link = identity),
rand.family = gaussian(link = identity), method = "EQL",
conv = 1e-6, maxit = 1000, fixed = y ~ 1,
random = ~ 1 | group, X.disp = model.matrix(~ Z), disp = ~ 1 + Z, link.disp = "log",
verbose= TRUE, data = Data)