This vignette illustrates the use of the funqtl package for QTL mapping with function-valued traits using functional principal component analysis and multi-trait mapping.
We will consider the analysis of a simulated data set, simspal
. There are 162 recombinant inbred lines typed at a total of 234 markers on 5 chromosomes.
We first load the funqtl package (which will also load R/qtl and other packages) and the data.
library(funqtl)
data(simspal)
Here’s a quick summary of the data.
summary(simspal)
## RI strains via selfing
##
## No. individuals: 162
##
## No. phenotypes: 241
## Percent phenotyped: 100
##
## No. chromosomes: 5
## Autosomes: 1 2 3 4 5
##
## Total markers: 234
## No. markers: 26 42 64 35 67
## Percent genotyped: 98.6
## Genotypes (%): AA:49.8 BB:50.2
The following plots the genetic marker map and the function-valued trait for five of the RIL.
par(mfrow=c(1,2))
plotMap(simspal, main="")
plot(1:241, simspal$pheno[160,], type="l", xlab="Time",
ylim=c(-120,0), ylab="Root Tip Angle (degrees)")
ind <- c(19, 20, 132, 72)
color <- c("blue", "red", "green", "orange")
for(i in seq(along=ind))
lines(1:241, simspal$pheno[ind[i],], col=color[i])
There are 162 individuals. The angle of the root tim is measured for 241 time points, which are measured every 2 minutes for eight hours.
out <- scanone(simspal, pheno.col=1:241, method="hk")
eff <- geteffects(simspal, pheno.cols=1:241)
nam <- phenames(simspal)
y <- as.numeric(substr(nam, 2, nchar(nam)))/60
plotlod(out, eff, y, gap=15, horizontal = T)
This Figure shows the image of signed lod scores for each time points. The x-axis represent time points and y-axis represent chromosome, lod score represented as tone of the color. Red indicate positive effects and blue color indicate negative effects.
Here, we’ve got lod score for all 241 time points. Our idea is to reduce the dimension of 241 to fewer number.
cvfold()
function.set.seed(1)
cvout <- cvfold(simspal, basisset = 15:60, fold = 10, random = F)
The 1st step is to decide how many basis functions to use estimating our function-valued trait. If we use too much basis it will fit the measurement error as well which is not meaningful. If we use too few basis the estimation may not enough to represent the truth.
We’ve developed cvfold
function to decide the number of basis functions. basisset
option describe the number of basis sets to be tested. fold
indicate how many folders to be used in cross validation. In this code, we divide our time points with equily spaced 10 sets. We used 9 folder to evaluate the functional. And calculated sum of squared errors for the left one folder.
The Figure shows the sum of error losses (y- axis) for each number of basis from 15 to 60.
plot(15:60, cvout, xlab = "The number of basis",
main = "sum of squared errors", type = "l")
which(cvout == min(cvout))
## 41
## 27
The minimum error loss calculated when we used 41 basis. So we decided to use 41 B-spline basis functions.
cfpc <- calcfunpca(simspal, criteria=0.99, nbasis = 41)
Y <- cfpc$Y
eigfc <- cfpc$eigf
dim(Y)
## [1] 162 4
Y[1:10,]
## PC1 PC2 PC3 PC4
## [1,] -903.7400 221.8631 -107.06066 -120.1815
## [2,] -1229.2545 171.1248 -95.29325 -140.7525
## [3,] -1246.8675 227.2852 -158.71505 -133.5347
## [4,] -1327.3137 179.6724 -114.09467 -124.0108
## [5,] -1081.8138 153.8482 -145.80452 -146.7473
## [6,] -1170.0532 187.0432 -112.30235 -132.0673
## [7,] -983.8873 262.9096 -128.67069 -115.6567
## [8,] -1155.9584 220.7031 -155.52029 -125.4240
## [9,] -1001.6113 286.1447 -138.34373 -120.9453
## [10,] -1024.5831 267.8272 -123.80506 -171.3865
Now, we can used calcfunpca
function to get small number of principal components.
41 B-spline basis functions are used. the number of 4 PCs are selected they explain more than 99 percent of data variation.
To convert those PCs back to original data information, we can use ‘eigfc’ object.
# PC for 1st individual
Y[1,]
## PC1 PC2 PC3 PC4
## -903.7400 221.8631 -107.0607 -120.1815
# plot the 1st individual ( raw data)
plot(1:241,simspal$pheno[1,], xlim=c(0,241), ylim=c(-90,0), ylab="angle", xlab="x")
par(new=T)
# plot the recovered data using PC
plot( -903.74*cfpc$eigf[1] + 222 *cfpc$eigf[2] -107*cfpc$eigf[3] -120 * cfpc$eigf[4], xlim=c(0,241), ylim=c(-90,0), col = "red", ylab="", xlab="")
## [1] "done"
The example shows how we can recover the 1st individual using PCs.
outhk <- scanoneM(simspal, Y = Y, method="hk")
outsl <- scanoneM(simspal, Y = Y, method="sl")
outml <- scanoneM(simspal, Y = Y, method="ml")
We proposed HKLOD, SL and ML scores, this statistic can be obtained using scanoneM
function.
par(mfrow=c(3,1))
plot(outhk, ylim=c(0,8), main="The HKLOD curve for simspal data",
bandcol="gray90")
abline(h=4.32, col="red", lty=3)
plot(outsl, ylim=c(0,2), main="The SL curve for simspal data",
bandcol="gray90")
abline(h=1.05, col="red", lty=3)
plot(outml, ylim=c(0,4), main="The ML curve for simspal data",
bandcol="gray90")
abline(h=3.05, col="red", lty=3)
# permutation threshold
# permouthk <- scanoneF(simspal, Y=Y, method = "hk", n.perm=1000)
# permoutf <- scanoneF(simspal, Y=Y, method = "f", n.perm=1000)
# permoutsl <- scanoneF(simspal, Y=Y, method = "sl", n.perm=1000)
# permoutml <- scanoneF(simspal, Y=Y, method = "ml", n.perm=1000)
# summary(permouthk) # display 5, 10 % threshold
# summary(permoutf) # display 5, 10 % threshold
# summary(permoutsl) # display 5, 10 % threshold
# summary(permoutml) # display 5, 10 % threshold
The results are in Figure above. Horizontal lines indicate the 5% genome-wide significance thresholds, derived by a permutation test. We didn’t run the code (for permutation test) above since it takes a long time (about one hour maybe) .
qtlhk <- stepwiseqtlM(simspal, Y = Y, max.qtl=6 , method = "hk", penalties = c(4.44, 10, 18), additive.only = T )
## -Initial scan
## ** new best ** (pLOD increased by 2.555)
## no.qtl = 1 pLOD = 2.554964 formula: y ~ Q1
## -Step 1
## ---Scanning for additive qtl
## plod = 3.208052
## ---Refining positions
## no.qtl = 2 pLOD = 3.208052 formula: y ~ Q1 + Q2
## ** new best ** (pLOD increased by 0.6531)
## -Step 2
## ---Scanning for additive qtl
## plod = 1.468638
## ---Refining positions
## no.qtl = 3 pLOD = 1.468638 formula: y ~ Q1 + Q2 + Q3
## -Step 3
## ---Scanning for additive qtl
## plod = -0.4206258
## ---Refining positions
## no.qtl = 4 pLOD = -0.4206258 formula: y ~ Q1 + Q2 + Q3 + Q4
## -Step 4
## ---Scanning for additive qtl
## plod = -2.455176
## ---Refining positions
## no.qtl = 5 pLOD = -2.455176 formula: y ~ Q1 + Q2 + Q3 + Q4 + Q5
## -Step 5
## ---Scanning for additive qtl
## plod = -3.744527
## ---Refining positions
## --- Moved a bit
## no.qtl = 6 pLOD = -3.38537 formula: y ~ Q1 + Q2 + Q3 + Q4 + Q5 + Q6
## -Starting backward deletion
## ---Dropping Q3
## no.qtl = 5 pLOD = -1.918663 formula: y ~ Q1 + Q2 + Q3 + Q4 + Q5
## ---Refining positions
## ---Dropping Q3
## no.qtl = 4 pLOD = -0.4675776 formula: y ~ Q1 + Q2 + Q3 + Q4
## ---Refining positions
## --- Moved a bit
## ---Dropping Q3
## no.qtl = 3 pLOD = 0.901747 formula: y ~ Q1 + Q2 + Q3
## ---Refining positions
## --- Moved a bit
## ---Dropping Q3
## no.qtl = 2 pLOD = 3.208052 formula: y ~ Q1 + Q2
## ---Refining positions
## ---Dropping Q2
## no.qtl = 1 pLOD = 2.554964 formula: y ~ Q1
## ---Refining positions
qtlml <- stepwiseqtlM(simspal, Y = Y, max.qtl=8 , method = "ml", penalties = c(2.05, 8, 8), additive.only = T )
## -Initial scan
## ** new best ** (pLOD increased by 1.8785)
## no.qtl = 1 pLOD = 1.878513 formula: y ~ Q1
## -Step 1
## ---Scanning for additive qtl
## plod = 3.145657
## ---Refining positions
## no.qtl = 2 pLOD = 3.145657 formula: y ~ Q1 + Q2
## ** new best ** (pLOD increased by 1.2671)
## -Step 2
## ---Scanning for additive qtl
## plod = 3.081701
## ---Refining positions
## no.qtl = 3 pLOD = 3.081701 formula: y ~ Q1 + Q2 + Q3
## -Step 3
## ---Scanning for additive qtl
## plod = 2.798956
## ---Refining positions
## no.qtl = 4 pLOD = 2.798956 formula: y ~ Q1 + Q2 + Q3 + Q4
## -Step 4
## ---Scanning for additive qtl
## plod = 0.9685021
## ---Refining positions
## --- Moved a bit
## no.qtl = 5 pLOD = 1.611057 formula: y ~ Q1 + Q2 + Q3 + Q4 + Q5
## -Step 5
## ---Scanning for additive qtl
## plod = -0.1220567
## ---Refining positions
## --- Moved a bit
## no.qtl = 6 pLOD = 1.064444 formula: y ~ Q1 + Q2 + Q3 + Q4 + Q5 + Q6
## -Step 6
## ---Scanning for additive qtl
## plod = -0.977073
## ---Refining positions
## --- Moved a bit
## no.qtl = 7 pLOD = 0.1129066 formula: y ~ Q1 + Q2 + Q3 + Q4 + Q5 + Q6 + Q7
## -Step 7
## ---Scanning for additive qtl
## plod = -1.896001
## ---Refining positions
## --- Moved a bit
## no.qtl = 8 pLOD = -1.525032 formula: y ~ Q1 + Q2 + Q3 + Q4 + Q5 + Q6 + Q7 + Q8
## -Starting backward deletion
## ---Dropping Q5
## no.qtl = 7 pLOD = -0.05376696 formula: y ~ Q1 + Q2 + Q3 + Q4 + Q5 + Q6 + Q7
## ---Refining positions
## ---Dropping Q6
## no.qtl = 6 pLOD = 1.025062 formula: y ~ Q1 + Q2 + Q3 + Q4 + Q5 + Q6
## ---Refining positions
## --- Moved a bit
## ---Dropping Q6
## no.qtl = 5 pLOD = 2.635989 formula: y ~ Q1 + Q2 + Q3 + Q4 + Q5
## ---Refining positions
## ---Dropping Q5
## no.qtl = 4 pLOD = 3.067093 formula: y ~ Q1 + Q2 + Q3 + Q4
## ---Refining positions
## ---Dropping Q4
## no.qtl = 3 pLOD = 2.47086 formula: y ~ Q1 + Q2 + Q3
## ---Refining positions
## --- Moved a bit
## ---Dropping Q3
## no.qtl = 2 pLOD = 3.145657 formula: y ~ Q1 + Q2
## ---Refining positions
## ---Dropping Q2
## no.qtl = 1 pLOD = 1.878513 formula: y ~ Q1
## ---Refining positions
qtlsl <- stepwiseqtlM(simspal, Y = Y, max.qtl=8 , method = "sl", penalties = c(1.07, 8, 8), additive.only = T )
## -Initial scan
## ** new best ** (pLOD increased by 0.5813)
## no.qtl = 1 pLOD = 0.5812575 formula: y ~ Q1
## -Step 1
## ---Scanning for additive qtl
## plod = 0.8016048
## ---Refining positions
## no.qtl = 2 pLOD = 0.8016048 formula: y ~ Q1 + Q2
## ** new best ** (pLOD increased by 0.2203)
## -Step 2
## ---Scanning for additive qtl
## plod = 0.3915321
## ---Refining positions
## no.qtl = 3 pLOD = 0.3915321 formula: y ~ Q1 + Q2 + Q3
## -Step 3
## ---Scanning for additive qtl
## plod = -0.07309657
## ---Refining positions
## no.qtl = 4 pLOD = -0.07309657 formula: y ~ Q1 + Q2 + Q3 + Q4
## -Step 4
## ---Scanning for additive qtl
## plod = -0.5704659
## ---Refining positions
## no.qtl = 5 pLOD = -0.5704659 formula: y ~ Q1 + Q2 + Q3 + Q4 + Q5
## -Step 5
## ---Scanning for additive qtl
## plod = -0.8452813
## ---Refining positions
## --- Moved a bit
## no.qtl = 6 pLOD = -0.7610157 formula: y ~ Q1 + Q2 + Q3 + Q4 + Q5 + Q6
## -Step 6
## ---Scanning for additive qtl
## plod = -1.163812
## ---Refining positions
## no.qtl = 7 pLOD = -1.163812 formula: y ~ Q1 + Q2 + Q3 + Q4 + Q5 + Q6 + Q7
## -Step 7
## ---Scanning for additive qtl
## plod = -1.707696
## ---Refining positions
## --- Moved a bit
## no.qtl = 8 pLOD = -1.7073 formula: y ~ Q1 + Q2 + Q3 + Q4 + Q5 + Q6 + Q7 + Q8
## -Starting backward deletion
## ---Dropping Q8
## no.qtl = 7 pLOD = -1.16386 formula: y ~ Q1 + Q2 + Q3 + Q4 + Q5 + Q6 + Q7
## ---Refining positions
## --- Moved a bit
## ---Dropping Q7
## no.qtl = 6 pLOD = -0.7610157 formula: y ~ Q1 + Q2 + Q3 + Q4 + Q5 + Q6
## ---Refining positions
## ---Dropping Q3
## no.qtl = 5 pLOD = -0.3889937 formula: y ~ Q1 + Q2 + Q3 + Q4 + Q5
## ---Refining positions
## ---Dropping Q3
## no.qtl = 4 pLOD = -0.02225778 formula: y ~ Q1 + Q2 + Q3 + Q4
## ---Refining positions
## ---Dropping Q3
## no.qtl = 3 pLOD = 0.2371079 formula: y ~ Q1 + Q2 + Q3
## ---Refining positions
## --- Moved a bit
## ---Dropping Q3
## no.qtl = 2 pLOD = 0.8016048 formula: y ~ Q1 + Q2
## ---Refining positions
## ---Dropping Q2
## no.qtl = 1 pLOD = 0.5812575 formula: y ~ Q1
## ---Refining positions
qtlhk
## QTL object containing genotype probabilities.
##
## name chr pos n.gen
## Q1 1@62.3 1 62.324 2
## Q2 4@40.3 4 40.290 2
##
## Formula: y ~ Q1 + Q2
##
## pLOD: 3.208
qtlsl
## QTL object containing genotype probabilities.
##
## name chr pos n.gen
## Q1 1@62.3 1 62.324 2
## Q2 4@40.3 4 40.290 2
##
## Formula: y ~ Q1 + Q2
##
## pLOD: 0.802
qtlml
## QTL object containing genotype probabilities.
##
## name chr pos n.gen
## Q1 1@57.4 1 57.406 2
## Q2 4@39.9 4 39.943 2
##
## Formula: y ~ Q1 + Q2
##
## pLOD: 3.146
stepwiseqtlM
function returns qtl object by using stepwise QTL selection.
par(mfrow=c(3,1))
rqtlhk <- refineqtlM(cross = simspal, Y = Y, qtl = qtlhk, method = "hk" )
## pos: 62.3236 40.28959
## Iteration 1
## Q1 pos: 62.3236 -> 62.3236
## LOD increase: 0
## Q2 pos: 40.28959 -> 40.28959
## LOD increase: 0
## all pos: 62.3236 40.28959 -> 62.3236 40.28959
## LOD increase at this iteration: 0
## overall pos: 62.3236 40.28959 -> 62.3236 40.28959
## LOD increase overall: 0
rqtlsl <- refineqtlM(cross = simspal, Y = Y, qtl = qtlsl, method = "sl" )
## pos: 62.3236 40.28959
## Iteration 1
## Q1 pos: 62.3236 -> 62.3236
## LOD increase: 0
## Q2 pos: 40.28959 -> 40.28959
## LOD increase: 0
## all pos: 62.3236 40.28959 -> 62.3236 40.28959
## LOD increase at this iteration: 0
## overall pos: 62.3236 40.28959 -> 62.3236 40.28959
## LOD increase overall: 0
rqtlml <- refineqtlM(cross = simspal, Y = Y, qtl = qtlml, method = "ml" )
## pos: 57.40582 39.94289
## Iteration 1
## Q1 pos: 57.40582 -> 57.40582
## LOD increase: 0
## Q2 pos: 39.94289 -> 39.94289
## LOD increase: 0
## all pos: 57.40582 39.94289 -> 57.40582 39.94289
## LOD increase at this iteration: 0
## overall pos: 57.40582 39.94289 -> 57.40582 39.94289
## LOD increase overall: 0
plotLodProfile(rqtlhk, ylab = "Profile LOD score", main = "HKLOD Profile")
plotLodProfile(rqtlsl, ylab = "Profile LOD score", main = "SL Profile")
plotLodProfile(rqtlml, ylab = "Profile LOD score", main = "ML Profile")
You can also make profile curves by using refineqtlM
and plotLodProfile
function. Here are HKLOD, SL and ML profile curves. All the method found 2-QTL model.
## getting Principal componetes.
Z <- calcfunpca(simspal, n.max=8, criteria = .99, nbasis = 30)
### This can be manualy done as follows ( this step needed to get eigen functions)
Y = t(simspal$pheno)
m = 241 # time dimension
## creating 30 b-spline basis using 4 knots
splinebasis.y <- create.bspline.basis(c(0, m), 30, 4)
time <- 0:(m - 1) + 0.5
## do smoothing
mat <- eval.basis(time, splinebasis.y)
coef.y <- solve(crossprod(mat), crossprod(mat, Y))
## get smoothed Y phenotype. now Y is n number of functionals
yfd = fd(coef.y, splinebasis.y, list("time", "indv", "value"))
## Do functional PCA
y.pcalist3 = pca.fd(yfd, 20)
## The proportion variance explained from eigen functions
y.pcalist3$varprop
## [1] 8.285929e-01 1.044871e-01 5.499397e-02 1.093040e-02 6.952561e-05
## [6] 6.674249e-05 5.992196e-05 5.752277e-05 5.615101e-05 5.375070e-05
## [11] 4.813015e-05 4.615560e-05 4.420127e-05 4.194585e-05 4.092614e-05
## [16] 3.804884e-05 3.667173e-05 3.500552e-05 3.273298e-05 3.050656e-05
## first 4 eigen values explain more than 99 percent of data variation.
sum(y.pcalist3$varprop[1:4])
## [1] 0.9990044
sum(y.pcalist3$varprop[1:3])
## [1] 0.988074
y.pcalist3 = pca.fd(yfd, 4)
eigfc3 <- y.pcalist3$harmonics
mat3 <- eval.fd(time, eigfc3)
nY3 <- t(solve(crossprod(mat3), crossprod(mat3, Y)))
temp <- simspal
temp$pheno[,1] <- Z$Y[,1]
temp$pheno[,2] <- Z$Y[,2]
temp$pheno[,3] <- Z$Y[,3]
temp$pheno[,4] <- Z$Y[,4]
hksleff <- vector("list", 4)
for(i in 1:4) {
hksleff[[i]] <- summary(fitqtl(temp, phe=i, qtl=rqtlhk, method="hk", get.ests=TRUE, dropone=FALSE))$ests[,1]*c(1,2,2)
}
nam <- names(hksleff[[1]])
hksleff <- matrix(unlist(hksleff), byrow=TRUE, ncol=length(nam))
colnames(hksleff) <- nam
By using these 4 eigen functions, we can recover effect functions. hksleff
have the coefficients for baseline, Q1 and Q2 effect functions. We can draw baseline, Q1 and Q2 effect function as follows.
par(mfrow=c(1,3))
par(las=1)
baselinehk <- hksleff[1,1] * eigfc3[1]
for(j in 2:4)
baselinehk = baselinehk + hksleff[j,1] * eigfc3[j]
time <- (0:240)/30
x = (0:240)
plot(baselinehk, xlim = c(0,250), ylim=c(-110,0), xaxt = "n", ylab="", col="red")
## [1] "done"
axis(1, at = x[c(1,61,121,181,241)] , labels = time[c(1,61,121,181,241)])
mtext("Time (hours)", side=1, line = 2.8, cex = .7)
mtext("Tip angle (degrees)", side=2, line = 2.8, las=3, cex = .7)
q1hk <- hksleff[1,2] * eigfc3[1]
for(j in 2:4)
q1hk = q1hk + hksleff[j,2] * eigfc3[j]
plot(q1hk, xlim = c(0,250), ylim=c(-5,9), xaxt = "n", col="red", ylab="")
## [1] "done"
axis(1, at = x[c(1,61,121,181,241)] , labels = time[c(1,61,121,181,241)])
mtext("Time (hours)", side=1, line = 2.8, cex = .7)
mtext("QTL effect", side=2, line = 2.8, las=3, cex = .7)
abline(h = 0, lty=2)
q2hk <- hksleff[1,3] * eigfc3[1]
for(j in 2:4)
q2hk = q2hk + hksleff[j,3] * eigfc3[j]
plot(q2hk, xlim = c(0,250), ylim=c(-5,9), xaxt = "n", col="red", ylab="")
## [1] "done"
axis(1, at = x[c(1,61,121,181,241)] , labels = time[c(1,61,121,181,241)])
mtext("Time (hours)", side=1, line = 2.8, cex = .7)
mtext("QTL effect", side=2, line = 2.8, las=3, cex = .7)
abline(h = 0, lty=2)