suppressMessages(silent <- lapply(
c("plyr", "dplyr", "tidyverse", "data.table", "vroom", "knitr", "kableExtra", "DescTools", "skimr"),
library, character.only=T))
table = function (..., useNA = 'always') base::table(..., useNA = useNA)## [1] 502527 5172
This code chunk has been modified for display to hide the 5-digit code that came with our data table
bd=vroom("/Users/mike/Documents/R_files/UKBpheno/pheno/ukbXXXXX.tab", delim="\t", show_col_types = FALSE)
source("src/components/ukbXXXXX_factordata.R") #file provided by UKB "ukbxxxxx_loaddata.R" without the loading part, to label the responses in survey questions
bd=as_tibble(bd)
dim(bd)#read.lines("src/components/load_UKBphenotables.r") #how I made this file
bd_join4<-vroom("bd_join4.tab", delim="\t", show_col_types = FALSE)
bd_join4=as_tibble(bd_join4)
dim(bd_join4)## [1] 501560 2584
source("src/components/manyColsToDummy.R")
hormonemeds<-read.csv("src/components/sexHormoneMeds.csv")
source("src/components/cbat.R")
source("src/components/SSRI_Meds.R")
codes<-read.table("src/components/phenocodes.txt", header=F)
QCids<-read.table("src/components/bd_QC-keep.txt",header=TRUE)
withdrawn<-read.csv("src/components/w48818_20220222.csv", header = FALSE)
hormonemeds%>%kbl()%>%
scroll_box(width = "200px", height = "200px")| code | name |
|---|---|
| 1140869270 | “medroxyprogesterone” |
| 1140910674 | “ethinylnortestosterone” |
| 1140857656 | “methyltestosterone product” |
| 1140865136 | “yohimbine/pemoline/methyltestosterone” |
| 1140868532 | “testosterone product” |
| 1140857690 | “oestradiol 25mg implant 36 week” |
| 1140857700 | “oestradiol 1mg/1ml injection” |
| 1141165318 | “cetrorelix” |
| 1140917306 | “bicalutamide” |
| 1140917310 | “casodex 50mg tablet” |
| 1140917448 | “oestradiol 1.25g/dose gel” |
| 1140917450 | “oestrogel 1.25g gel” |
| 1141171536 | “ganirelix” |
| 1140884638 | “clomiphene” |
| 1140884634 | “cyproterone” |
| 1140870186 | “oestrifen 10mg tablet” |
| 1140921100 | “triptorelin” |
| 1140868984 | “nafarelin” |
| 1140868978 | “gestrinone” |
| 1140884544 | “leuprorelin” |
| 1140870248 | “buserelin” |
| 1140870194 | “goserelin” |
| 1141157394 | “goserelin product” |
| 1140868882 | “gonadorelin” |
| 1141182558 | “urofollitropin” |
| 1140868722 | “cyclofenil” |
| 1140868580 | “norethisterone” |
| 1140869328 | “ethinyloestradiol+norethisterone acetate 20mcg/1mg tablet” |
| 1140869356 | “mestranol+norethisterone 50micrograms/1mg tablet” |
| 1141157406 | “norethisterone product” |
| 1141168324 | “oestradiol+norethisterone acetate 1mg/0.5mg tablet” |
| 1141181298 | “ethinylestradiol+norethisterone acetate 20mcg/1mg tablet” |
| 1141181204 | “ethinylestradiol+norgestimate 35mcg/250mcg tablet” |
| 1141181218 | “ethinylestradiol product” |
| 1141181220 | “ethinylestradiol” |
| 1141181240 | “ethinylestradiol+levonorgestrel 30mcg/150mcg tablet” |
| 1141181286 | “ethinylestradiol+desogestrel 20mcg/150mcg tablet” |
| 1140869348 | “ethinyloestradiol+norgestimate 35mcg/250mcg tablet” |
| 1140868520 | “estracombi tts patch” |
| 1140857628 | “gestone 10mg/1ml injection” |
| 1140857704 | “ovestin 250micrograms tablet” |
| 1140857706 | “oestriol 250micrograms tablet” |
| 1140857708 | “quinestradol” |
| 1140857710 | “pentovis 250micrograms tablet” |
| 1140857714 | “quinestrol” |
| 1140857716 | “estrovis 4mg tablet” |
| 1140857736 | “virormone 10mg/1ml injection” |
| 1140857912 | “desogestrel” |
| 1140857918 | “lynoestrenol” |
| 1140857920 | “minilyn tablet” |
| 1140858324 | “medroxyprogest 80mg/ml suspension 100ml” |
| 1140858332 | “sh-420 10mg tablet” |
| 1140858338 | “drostanolone propionate” |
| 1140858340 | “masteril 100mg/1ml injection” |
| 1140858348 | “noltam 10mg tablet” |
| 1140864196 | “climagest 1mg tablet” |
| 1140864502 | “testotop tts 15mg transdermal patch” |
| 1140868400 | “oestriol product” |
| 1140868372 | “climaval 1mg tablet” |
| 1140868406 | “conjugated oestrogens” |
| 1140868408 | “premarin 625micrograms tablet” |
| 1140868420 | “piperazine oestrone sulphate” |
| 1140868422 | “harmogen 1.5mg tablet” |
| 1140868472 | “vagifem 25mcg pessary” |
| 1140868482 | “tibolone” |
| 1140868488 | “allyloestrenol” |
| 1140868490 | “gestanin 5mg tablet” |
| 1140868494 | “dydrogesterone” |
| 1140868496 | “duphaston 10mg tablet” |
| 1140868508 | “cyclo-progynova 1mg tablet” |
| 1140868512 | “syntex menophase tablet” |
| 1140868514 | “trisequens tablet” |
| 1140868518 | “nuvelle tablet” |
| 1140868524 | “androcur 50mg tablet” |
| 1140868526 | “mesterolone” |
| 1140868528 | “pro-viron 25mg tablet” |
| 1140868534 | “primoteston depot 250mg/1ml oily injection” |
| 1140868536 | “restandol 40mg capsule” |
| 1140868538 | “sustanon 100 oily injection” |
| 1140868584 | “primolut-n 5mg tablet” |
| 1140868586 | “utovlan 5mg tablet” |
| 1140868588 | “progesterone product” |
| 1140868590 | “cyclogest 200mg suppository” |
| 1140868614 | “deca-durabolin 25mg/1ml oily injection” |
| 1140868618 | “stanozolol” |
| 1140868620 | “stromba 5mg tablet” |
| 1140868628 | “humegon 75iu injection (pdr for recon)+solvent” |
| 1140869032 | “dienoestrol” |
| 1140869034 | “ortho-gynest 500micrograms pessary” |
| 1140869166 | “ethinyloestradiol+desogestrel 20mcg/150mcg tablet” |
| 1140869170 | “conova 30 tablet” |
| 1140869172 | “ethinyloestradiol+ethynodiol diacetate 30mcg/2mg tablet” |
| 1140869174 | “eugynon 30 tablet” |
| 1140869176 | “logynon tablet” |
| 1140869180 | “microgynon 30 tablet” |
| 1140869184 | “ovran 30 tablet” |
| 1140869186 | “ovranette tablet” |
| 1140869188 | “schering pc4 tablet” |
| 1140869190 | “trinordiol tablet” |
| 1140869248 | “ethinyloestradiol+levonorgestrel 30mcg/150mcg tablet” |
| 1140869254 | “binovum tablet” |
| 1140869256 | “brevinor tablet” |
| 1140869258 | “neocon 1/35 tablet” |
| 1140869260 | “norimin tablet” |
| 1140869262 | “ovysmen tablet” |
| 1140869264 | “synphase tablet” |
| 1140869266 | “trinovum tablet” |
| 1140869272 | “neogest tablet” |
| 1140869276 | “micronor tablet” |
| 1140869278 | “noriday tablet” |
| 1140869282 | “noristerat 200mg/1ml oily injection” |
| 1140869324 | “loestrin 20 tablet” |
| 1140869332 | “minulet tablet” |
| 1140869334 | “femodene tablet” |
| 1140869338 | “tri-minulet tablet” |
| 1140869340 | “triadene tablet” |
| 1140869346 | “cilest tablet” |
| 1140869352 | “norinyl-1 tablet” |
| 1140869354 | “ortho-novin 1/50 tablet” |
| 1140869360 | “ethynodiol diacetate” |
| 1140869362 | “femulen tablet” |
| 1140869366 | “levonorgestrel” |
| 1140869368 | “microval tablet” |
| 1140869370 | “norgeston tablet” |
| 1140870060 | “polyestradiol phosphate” |
| 1140870052 | “honvan 100mg tablet” |
| 1140870062 | “estradurin 40mg injection (pdr for recon)+diluent” |
| 1140870070 | “stilboestrol” |
| 1140870076 | “apstil 1mg tablet” |
| 1140876638 | “cyproterone acetate+ethinyloestradiol” |
| 1140879554 | “estramustine” |
| 1140884622 | “oestrogen product” |
| 1140884624 | “fosfestrol” |
| 1140884626 | “mestranol” |
| 1140884688 | “hydroxyprogesterone” |
| 1140909906 | “estropipate” |
| 1140910562 | “gestonorone” |
| 1140921088 | “tridestra tablet” |
| 1141157404 | “ethinyloestradiol product” |
| 1141157410 | “levonorgestrel product” |
| 1141157492 | “mestranol product” |
| 1141166196 | “etonogestrel” |
| 1141166354 | “testoderm 6mg/24hours transdermal patch” |
| 1141166366 | “ethinyloestradiol+gestodene 20micrograms/75micrograms tablet” |
| 1141167206 | “oestrogel 0.06% gel” |
| 1141177150 | “adgyn medro 5mg tablet” |
| 1141177158 | “adgyn estro 2mg tablet” |
| 1141177226 | “adgyn combi 2mg tablet” |
| 1141179822 | “ethinylestradiol+drospirenone 30micrograms/3mg tablet” |
| 1141179820 | “drospirenone” |
| 1141180988 | “dienestrol” |
| 1141181594 | “estriol product” |
| 1141181700 | “estradiol product” |
| 1141182794 | “desogestrel product” |
| 1141190580 | “conjugated oestrogens 0.3mg / medroxyprogesterone 1.5mg tab” |
| 1141192344 | “cyproterone acetate+ethinylestradiol” |
| 1141192874 | “ethinylestradiol+norelgestromin 600mcg/6mg transdermal patch” |
| 1141193272 | “testogel 50mg gel 5g sachet” |
| 1141202030 | “estradot 25micrograms patch” |
| 1141181818 | “estradiol+norethisterone acetate 1mg/0.5mg tablet” |
ssricodes%>%kbl()%>%
scroll_box(width = "200px", height = "200px")| x |
|---|
| 1140921600 |
| 1141180212 |
| 1140879540 |
| 1140867888 |
| 1140867878 |
| 1140867876 |
| 1140867884 |
| 1140867888 |
| 1140867860 |
codes%>%kbl()%>%
scroll_box(width = "200px", height = "200px")| V1 | V2 |
|---|---|
| 30620 | Alanine_aminotransferase |
| 30600 | Albumin |
| 30610 | Alkaline_phosphatase |
| 30630 | Apolipoprotein_A |
| 30640 | Apolipoprotein_B |
| 30650 | Aspartate_aminotransferase |
| 30710 | C-reactive_protein |
| 30680 | Calcium |
| 30690 | Cholesterol |
| 30700 | Creatinine |
| 30720 | Cystatin_C |
| 30660 | Direct_bilirubin |
| 30730 | Gamma_glutamyltransferase |
| 30740 | Glucose |
| 30750 | Glycated_haemoglobin_(HbA1c) |
| 30760 | HDL_cholesterol |
| 30770 | IGF-1 |
| 30780 | LDL_direct |
| 30790 | Lipoprotein_A |
| 30800 | Oestradiol |
| 30810 | Phosphate |
| 30820 | Rheumatoid_factor |
| 30830 | SHBG |
| 30850 | Testosterone |
| 30840 | Total_bilirubin |
| 30860 | Total_protein |
| 30870 | Triglycerides |
| 30880 | Urate |
| 30670 | Urea |
| 30890 | Vitamin_D |
pheno<-bd%>%select(f.eid,f.21003.0.0, f.31.0.0,
f.189.0.0, f.54.0.0, f.22000.0.0,
f.1558.0.0,
paste("f.", codes$V1, ".0.0", sep=""),
f.22009.0.1:f.22009.0.20)
colnames(pheno)<-c("IID", "Age", "Sex",
"Townsend","Assessment_center", "Geno_batch",
"AlcoholFreq",
codes$V2,
paste("PCA", 1:20,sep=""))
pheno<-as_tibble(pheno)pheno2<-bd_join4%>%select(f.eid, f.21001.0.0,
f.20116.0.0,
f.3166.0.0)
colnames(pheno2)<-c("IID","BMI",
"SmokeStatus",
"blood_draw_time")
new<-left_join(pheno, pheno2, by="IID")
new<-as_tibble(new)
colnames(new)%>% kbl()%>%
scroll_box(width = "600px", height = "200px")| x |
|---|
| IID |
| Age |
| Sex |
| Townsend |
| Assessment_center |
| Geno_batch |
| AlcoholFreq |
| Alanine_aminotransferase |
| Albumin |
| Alkaline_phosphatase |
| Apolipoprotein_A |
| Apolipoprotein_B |
| Aspartate_aminotransferase |
| C-reactive_protein |
| Calcium |
| Cholesterol |
| Creatinine |
| Cystatin_C |
| Direct_bilirubin |
| Gamma_glutamyltransferase |
| Glucose |
| Glycated_haemoglobin_(HbA1c) |
| HDL_cholesterol |
| IGF-1 |
| LDL_direct |
| Lipoprotein_A |
| Oestradiol |
| Phosphate |
| Rheumatoid_factor |
| SHBG |
| Testosterone |
| Total_bilirubin |
| Total_protein |
| Triglycerides |
| Urate |
| Urea |
| Vitamin_D |
| PCA1 |
| PCA2 |
| PCA3 |
| PCA4 |
| PCA5 |
| PCA6 |
| PCA7 |
| PCA8 |
| PCA9 |
| PCA10 |
| PCA11 |
| PCA12 |
| PCA13 |
| PCA14 |
| PCA15 |
| PCA16 |
| PCA17 |
| PCA18 |
| PCA19 |
| PCA20 |
| BMI |
| SmokeStatus |
| blood_draw_time |
dim(new)## [1] 502527 60
#Fix hyphenated column names
colnames(new)[colnames(new) %in%
c("C-reactive_protein", "Glycated_haemoglobin_(HbA1c)",
"IGF-1")]=
c("C_reactive_protein","HbA1c","IGF_1")nrow(withdrawn)## [1] 114
new<-new[!(new$IID %in% withdrawn$V1), ]nrow(QCids)## [1] 356980
new<-new[(new$IID %in% QCids$IID),]Veg5yr=as_tibble(read.csv("../VegMarkdown/DefineVeg-09062022.csv"))
Vegadd=Veg5yr%>%select(IID,Veg5yr1)
new<-left_join(new, Vegadd) ## Joining, by = "IID"
new=new%>%filter(!is.na(Veg5yr1))
table(new$Veg5yr1)##
## 0 1 <NA>
## 153047 2328 0
# 0 1
#153047 2328Age2
new$Age2<-new$Age^2Smoking: Turn 20116 SmokeStatus into two new dummy variables
new$PreviousSmoker=0
new$PreviousSmoker[new$SmokeStatus==1]=1
new$CurrentSmoker=0
new$CurrentSmoker[new$SmokeStatus==2]=1Alcohol: a binary measure of current drinking for self-reported current drinkers (three or more times a week against less than three times a week). Ref:https://doi.org/10.1038/s41467-019-12424-x
new$AlcoholFreq=mapvalues(new$AlcoholFreq,
from=c("Prefer not to answer", "Never", "Special occasions only",
"One to three times a month", "Once or twice a week",
"Three or four times a week", "Daily or almost daily"),
to=c(NA,0,0,0,0,1,1))
new$AlcoholFreq=as.numeric(as.character(new$AlcoholFreq))
table(new$AlcoholFreq)%>%kbl()| Var1 | Freq |
|---|---|
| 0 | 77958 |
| 1 | 77327 |
| NA | 90 |
I didn’t use assessment center but this code is here for our labmates to use.
table(pheno$Assessment_center)##
## 10003 11001 11002 11003 11004 11005 11006 11007 11008 11009 11010 11011 11012
## 3793 13939 14058 17878 18647 17195 19433 29409 28321 37002 44198 43012 12582
## 11013 11014 11016 11017 11018 11020 11021 11022 11023 <NA>
## 33876 30396 32816 21286 28875 27380 25501 2281 649 0
centers<-unique(new$Assessment_center)
centercols<-paste("center", 1:22, sep="")
new[centercols]<-0
for (i in 1:length(centers)){
new[new$Assessment_center==centers[i],][centercols[i]]<-1
}
new<-new%>%select(-Assessment_center)new$Geno_batch1<-0
new$Geno_batch1[new$Geno_batch>0]<-1
new$Geno_batch<-new$Geno_batch1
new<-new%>%select(-Geno_batch1)
table(new$Geno_batch)%>%kbl() | Var1 | Freq |
|---|---|
| 0 | 15940 |
| 1 | 139435 |
| NA | 0 |
new$Sex<-mapvalues(as.character(new$Sex),
c("Male", "Female"), c(1,2))
new$Sex<-as.numeric(new$Sex)new$zblood<-str_split(new$blood_draw_time, " ", simplify = TRUE)[,2] #Take just time
new$zblood<-str_split(new$zblood, ":", simplify=TRUE)[,c(1,2)] #take minutes and seconds
new$zblood[,2]<-as.numeric(new$zblood[,2])/60 #convert minutes to hours
new$zblood<-as.numeric(new$zblood[,1])+as.numeric(new$zblood[,2]) #add together
new$zblood<-(new$zblood - mean(new$zblood, na.rm=TRUE)) / sd(new$zblood, na.rm=TRUE) #standardize
#This column with the colons ":" messes up many softwares so remove it
new=new%>%select(-blood_draw_time)new$zTownsend=(new$Townsend - mean(new$Townsend, na.rm=TRUE)) /sd(new$Townsend, na.rm=TRUE)new$zAge=(new$Age - mean(new$Age, na.rm=TRUE)) / sd(new$Age, na.rm=TRUE)new$logSHBG=log(new$SHBG)CKD-EPI creatinine-cystatin equation (2021)
https://www.kidney.org/content/ckd-epi-creatinine-cystatin-equation-2021
#Creatinine default units umol/L
#Formula needs mg/dL
#To convert μmol/l to mg/dL, divide by 88.4.
new$Scr<-new$Creatinine/88.42
#Cystatin C default units: mg/L
#Formula needs mg/L. No conversion necessary
#eGFR: CKD-EPI Creatinine-Cystatin equation:
new$eGFR<-0 #initialize col
new$eGFR[new$Sex=="1"]<-135*
(pmin((new$Scr[new$Sex=="1"])/0.9, 1))^(-0.144) *
(pmax(new$Scr[new$Sex=="1"]/0.9, 1))^(-0.544) *
(pmin((new$Cystatin_C[new$Sex=="1"])/0.8, 1))^(-0.323) *
(pmax((new$Cystatin_C[new$Sex=="1"])/0.8, 1))^(-0.778) *
0.9961^(new$Age[new$Sex=="1"])
new$eGFR[new$Sex=="2"]<-135*
(pmin((new$Scr[new$Sex=="2"])/0.7, 1))^(-0.219) *
(pmax(new$Scr[new$Sex=="2"]/0.7, 1))^(-0.544) *
(pmin((new$Cystatin_C[new$Sex=="2"])/0.8, 1))^(-0.323) *
(pmax((new$Cystatin_C[new$Sex=="2"])/0.8, 1))^(-0.778) *
0.9961^(new$Age[new$Sex=="2"]) * 0.963 #for females
#Winsorize
new$eGFR<-Winsorize(new$eGFR, minval = 15, maxval = 200,
probs = c(0,1), type=1)#Calculate bioavailable T
cbatoutput<-cbat(new$Testosterone, SHBG = new$SHBG,
ALB = new$Albumin)
new$bioavailableTest<-cbatoutput$bioavailableTest
new$freeTest=cbatoutput$freeTeststatincols<-c(sprintf("f.20003.0.%s", 0:47))
statincodes<-c(1141146234,1141192414,1140910632,1140888594,1140864592,
1141146138,1140861970,1140888648,1141192410,
1141188146,1140861958,1140881748,1141200040)
manyColsToDummy(statincodes, bd_join4[,statincols], "statinoutput")
statinoutput$statins<-rowSums(statinoutput)
statinoutput$statins[statinoutput$statins>1]<-1
statinoutput$IID<-bd_join4$f.eid
statinoutput<-statinoutput%>%select(IID, statins)
new<-left_join(new, statinoutput, by="IID")Cholesterol, LDL-C, and Apolipoprotein B1
sum(new$statins, na.rm=T)## [1] 22282
new$Cholesterolold=new$Cholesterol
new$LDL_directold=new$LDL_direct
new$Apolipoprotein_Bold=new$Apolipoprotein_B
new$Cholesterol[which(new$statins==1)]<-
new$Cholesterol[which(new$statins==1)]/0.749
new$LDL_direct[which(new$statins==1)]<-
new$LDL_direct[which(new$statins==1)]/0.684
new$Apolipoprotein_B[which(new$statins==1)]<-
new$Apolipoprotein_B[which(new$statins==1)]/0.719medcols<-c("f.eid", sprintf("f.20003.0.%s", 0:47))
medcodes<-hormonemeds$code #sourced from sexHormoneMeds.csv
names(medcodes)<-hormonemeds$name
manyColsToDummy(medcodes, bd_join4[,medcols], "medoutput")
colnames(medoutput)<-names(medcodes)
medoutput$tookMed<-apply(medoutput, 1, sum, na.rm=TRUE)
nrow(medoutput[medoutput$tookMed>0,])## [1] 12665
medoutput$tookMed[medoutput$tookMed>1]<-1
medoutput$IID<-bd_join4$f.eid
medoutput2<-medoutput%>%select(IID, tookMed)
new<-left_join(new, medoutput2, by= "IID")sum(is.na((new$Testosterone)))## [1] 21032
new$Testosterone[which(!is.na(new$Testosterone) & new$tookMed==1)]<-NA
sum(is.na((new$Testosterone)))## [1] 24118
sum(is.na((new$bioavailableTest)))## [1] 33053
new$bioavailableTest[which(!is.na(new$bioavailableTest) & new$tookMed==1)]<-NA
sum(is.na((new$bioavailableTest)))## [1] 35784
manyColsToDummy(ssricodes, bd_join4[,medcols], "ssrioutput")
colnames(ssrioutput)<-names(ssricodes)
ssrioutput$tookSSRI<-apply(ssrioutput, 1, sum, na.rm=TRUE)
nrow(ssrioutput[ssrioutput$tookSSRI>0,])## [1] 19309
ssrioutput$tookSSRI[ssrioutput$tookSSRI>1]<-1
ssrioutput$IID<-bd_join4$f.eid
ssrioutput2<-ssrioutput%>%select(IID, tookSSRI)
new<-left_join(new, ssrioutput2, by= "IID")inversenormal <- function(x) {
# inverse normal if you have missing data
return(qnorm((rank(x,na.last="keep", ties.method="random")-0.5)/sum(!is.na(x))))
}phenotypes=new%>%select(Alanine_aminotransferase:Vitamin_D, eGFR,bioavailableTest, freeTest)
colnames(phenotypes)%>%kbl()%>%
scroll_box(width = "600px", height = "200px")| x |
|---|
| Alanine_aminotransferase |
| Albumin |
| Alkaline_phosphatase |
| Apolipoprotein_A |
| Apolipoprotein_B |
| Aspartate_aminotransferase |
| C_reactive_protein |
| Calcium |
| Cholesterol |
| Creatinine |
| Cystatin_C |
| Direct_bilirubin |
| Gamma_glutamyltransferase |
| Glucose |
| HbA1c |
| HDL_cholesterol |
| IGF_1 |
| LDL_direct |
| Lipoprotein_A |
| Oestradiol |
| Phosphate |
| Rheumatoid_factor |
| SHBG |
| Testosterone |
| Total_bilirubin |
| Total_protein |
| Triglycerides |
| Urate |
| Urea |
| Vitamin_D |
| eGFR |
| bioavailableTest |
| freeTest |
rint=as_tibble(sapply(phenotypes, inversenormal))
colnames(rint)=paste(colnames(rint), "rint", sep="_")
rint$IID<-new$IID
new<-left_join(new, rint, by="IID")outdir="pheno"
suppressWarnings(dir.create(outdir))
new$FID<-new$IID
new<-new%>%select(FID, everything())
participants<-new%>%select(FID,IID)
write.table(new,
paste(outdir, "/BioxVeg_pheno_09112022.txt", sep=""),
row.names=FALSE, quote=FALSE)
write.csv(new,
paste(outdir, "/BioxVeg_pheno_09112022.csv", sep=""),
row.names=FALSE, quote=FALSE)Westerman 2022:for those with self-reported use of a statin medication, each of these biomarkers was divided by an adjustment factor (0.749, 0.684, and 0.719, respectively) that had been empirically estimated by Sinnott-Armstrong and colleagues in the same population.↩︎
resdat<-matrix(NA,nrow(new),length(phenotypes))
colnames(resdat)<-phenotypes
for (p in 1:length(phenotypes)){
assign(paste("lm", phenotypes[p], sep="_"),
lm(new[[phenotypes[p]]] ~
Age,Sex, zTownsend,
#ADD COVARIATES HERE
data=new,
na.action=na.exclude))
lmname<-paste("lm", phenotypes[p], sep="_")
lmobj<-get(lmname)
resdat[,p]<-resid(lmobj)
}
resdat<-as_tibble(resdat)
colnames(resdat)<-paste(phenotypes, "res", sep="_")
resdat_inv<-matrix(NA,nrow(new),length(phenotypes))
colnames(resdat_inv)<-paste(phenotypes, "resinv", sep="_")
inversenormal <- function(x) {
# inverse normal if you have missing data
return(qnorm((rank(x,na.last="keep", ties.method="random")-0.5)/sum(!is.na(x))))
}
for (i in 1:length(phenotypes)){
resdat_inv[,i] <- inversenormal(resdat[,i])
}
resdat_inv<-as_tibble(as.data.frame(resdat_inv))
resdat_inv$IID<-new$IID
new1<-left_join(new, resdat_inv, by="IID")
###
### Direct rank-based inverse normal transformation (D-INT)
#### Phenotypes
phenotypes=new%>%select(Alanine_aminotransferase:Vitamin_D, eGFR,
bioavailableTest)
colnames(phenotypes)