suppressMessages(silent <- lapply(
c("plyr", "dplyr", "tidyverse", "data.table", "vroom", "knitr", "kableExtra", "DescTools", "skimr"),
character.only=T))
library, = function (..., useNA = 'always') base::table(..., useNA = useNA) table
## [1] 502527 5172
This code chunk has been modified for display to hide the 5-digit code that came with our data table
=vroom("/Users/mike/Documents/R_files/UKBpheno/pheno/ukbXXXXX.tab", delim="\t", show_col_types = FALSE)
bdsource("src/components/ukbXXXXX_factordata.R") #file provided by UKB "ukbxxxxx_loaddata.R" without the loading part, to label the responses in survey questions
=as_tibble(bd)
bddim(bd)
#read.lines("src/components/load_UKBphenotables.r") #how I made this file
<-vroom("bd_join4.tab", delim="\t", show_col_types = FALSE)
bd_join4=as_tibble(bd_join4)
bd_join4dim(bd_join4)
## [1] 501560 2584
source("src/components/manyColsToDummy.R")
<-read.csv("src/components/sexHormoneMeds.csv")
hormonemedssource("src/components/cbat.R")
source("src/components/SSRI_Meds.R")
<-read.table("src/components/phenocodes.txt", header=F)
codes<-read.table("src/components/bd_QC-keep.txt",header=TRUE)
QCids<-read.csv("src/components/w48818_20220222.csv", header = FALSE)
withdrawn%>%kbl()%>%
hormonemedsscroll_box(width = "200px", height = "200px")
code | name |
---|---|
1140869270 | “medroxyprogesterone” |
1140910674 | “ethinylnortestosterone” |
1140857656 | “methyltestosterone product” |
1140865136 | “yohimbine/pemoline/methyltestosterone” |
1140868532 | “testosterone product” |
1140857690 | “oestradiol 25mg implant 36 week” |
1140857700 | “oestradiol 1mg/1ml injection” |
1141165318 | “cetrorelix” |
1140917306 | “bicalutamide” |
1140917310 | “casodex 50mg tablet” |
1140917448 | “oestradiol 1.25g/dose gel” |
1140917450 | “oestrogel 1.25g gel” |
1141171536 | “ganirelix” |
1140884638 | “clomiphene” |
1140884634 | “cyproterone” |
1140870186 | “oestrifen 10mg tablet” |
1140921100 | “triptorelin” |
1140868984 | “nafarelin” |
1140868978 | “gestrinone” |
1140884544 | “leuprorelin” |
1140870248 | “buserelin” |
1140870194 | “goserelin” |
1141157394 | “goserelin product” |
1140868882 | “gonadorelin” |
1141182558 | “urofollitropin” |
1140868722 | “cyclofenil” |
1140868580 | “norethisterone” |
1140869328 | “ethinyloestradiol+norethisterone acetate 20mcg/1mg tablet” |
1140869356 | “mestranol+norethisterone 50micrograms/1mg tablet” |
1141157406 | “norethisterone product” |
1141168324 | “oestradiol+norethisterone acetate 1mg/0.5mg tablet” |
1141181298 | “ethinylestradiol+norethisterone acetate 20mcg/1mg tablet” |
1141181204 | “ethinylestradiol+norgestimate 35mcg/250mcg tablet” |
1141181218 | “ethinylestradiol product” |
1141181220 | “ethinylestradiol” |
1141181240 | “ethinylestradiol+levonorgestrel 30mcg/150mcg tablet” |
1141181286 | “ethinylestradiol+desogestrel 20mcg/150mcg tablet” |
1140869348 | “ethinyloestradiol+norgestimate 35mcg/250mcg tablet” |
1140868520 | “estracombi tts patch” |
1140857628 | “gestone 10mg/1ml injection” |
1140857704 | “ovestin 250micrograms tablet” |
1140857706 | “oestriol 250micrograms tablet” |
1140857708 | “quinestradol” |
1140857710 | “pentovis 250micrograms tablet” |
1140857714 | “quinestrol” |
1140857716 | “estrovis 4mg tablet” |
1140857736 | “virormone 10mg/1ml injection” |
1140857912 | “desogestrel” |
1140857918 | “lynoestrenol” |
1140857920 | “minilyn tablet” |
1140858324 | “medroxyprogest 80mg/ml suspension 100ml” |
1140858332 | “sh-420 10mg tablet” |
1140858338 | “drostanolone propionate” |
1140858340 | “masteril 100mg/1ml injection” |
1140858348 | “noltam 10mg tablet” |
1140864196 | “climagest 1mg tablet” |
1140864502 | “testotop tts 15mg transdermal patch” |
1140868400 | “oestriol product” |
1140868372 | “climaval 1mg tablet” |
1140868406 | “conjugated oestrogens” |
1140868408 | “premarin 625micrograms tablet” |
1140868420 | “piperazine oestrone sulphate” |
1140868422 | “harmogen 1.5mg tablet” |
1140868472 | “vagifem 25mcg pessary” |
1140868482 | “tibolone” |
1140868488 | “allyloestrenol” |
1140868490 | “gestanin 5mg tablet” |
1140868494 | “dydrogesterone” |
1140868496 | “duphaston 10mg tablet” |
1140868508 | “cyclo-progynova 1mg tablet” |
1140868512 | “syntex menophase tablet” |
1140868514 | “trisequens tablet” |
1140868518 | “nuvelle tablet” |
1140868524 | “androcur 50mg tablet” |
1140868526 | “mesterolone” |
1140868528 | “pro-viron 25mg tablet” |
1140868534 | “primoteston depot 250mg/1ml oily injection” |
1140868536 | “restandol 40mg capsule” |
1140868538 | “sustanon 100 oily injection” |
1140868584 | “primolut-n 5mg tablet” |
1140868586 | “utovlan 5mg tablet” |
1140868588 | “progesterone product” |
1140868590 | “cyclogest 200mg suppository” |
1140868614 | “deca-durabolin 25mg/1ml oily injection” |
1140868618 | “stanozolol” |
1140868620 | “stromba 5mg tablet” |
1140868628 | “humegon 75iu injection (pdr for recon)+solvent” |
1140869032 | “dienoestrol” |
1140869034 | “ortho-gynest 500micrograms pessary” |
1140869166 | “ethinyloestradiol+desogestrel 20mcg/150mcg tablet” |
1140869170 | “conova 30 tablet” |
1140869172 | “ethinyloestradiol+ethynodiol diacetate 30mcg/2mg tablet” |
1140869174 | “eugynon 30 tablet” |
1140869176 | “logynon tablet” |
1140869180 | “microgynon 30 tablet” |
1140869184 | “ovran 30 tablet” |
1140869186 | “ovranette tablet” |
1140869188 | “schering pc4 tablet” |
1140869190 | “trinordiol tablet” |
1140869248 | “ethinyloestradiol+levonorgestrel 30mcg/150mcg tablet” |
1140869254 | “binovum tablet” |
1140869256 | “brevinor tablet” |
1140869258 | “neocon 1/35 tablet” |
1140869260 | “norimin tablet” |
1140869262 | “ovysmen tablet” |
1140869264 | “synphase tablet” |
1140869266 | “trinovum tablet” |
1140869272 | “neogest tablet” |
1140869276 | “micronor tablet” |
1140869278 | “noriday tablet” |
1140869282 | “noristerat 200mg/1ml oily injection” |
1140869324 | “loestrin 20 tablet” |
1140869332 | “minulet tablet” |
1140869334 | “femodene tablet” |
1140869338 | “tri-minulet tablet” |
1140869340 | “triadene tablet” |
1140869346 | “cilest tablet” |
1140869352 | “norinyl-1 tablet” |
1140869354 | “ortho-novin 1/50 tablet” |
1140869360 | “ethynodiol diacetate” |
1140869362 | “femulen tablet” |
1140869366 | “levonorgestrel” |
1140869368 | “microval tablet” |
1140869370 | “norgeston tablet” |
1140870060 | “polyestradiol phosphate” |
1140870052 | “honvan 100mg tablet” |
1140870062 | “estradurin 40mg injection (pdr for recon)+diluent” |
1140870070 | “stilboestrol” |
1140870076 | “apstil 1mg tablet” |
1140876638 | “cyproterone acetate+ethinyloestradiol” |
1140879554 | “estramustine” |
1140884622 | “oestrogen product” |
1140884624 | “fosfestrol” |
1140884626 | “mestranol” |
1140884688 | “hydroxyprogesterone” |
1140909906 | “estropipate” |
1140910562 | “gestonorone” |
1140921088 | “tridestra tablet” |
1141157404 | “ethinyloestradiol product” |
1141157410 | “levonorgestrel product” |
1141157492 | “mestranol product” |
1141166196 | “etonogestrel” |
1141166354 | “testoderm 6mg/24hours transdermal patch” |
1141166366 | “ethinyloestradiol+gestodene 20micrograms/75micrograms tablet” |
1141167206 | “oestrogel 0.06% gel” |
1141177150 | “adgyn medro 5mg tablet” |
1141177158 | “adgyn estro 2mg tablet” |
1141177226 | “adgyn combi 2mg tablet” |
1141179822 | “ethinylestradiol+drospirenone 30micrograms/3mg tablet” |
1141179820 | “drospirenone” |
1141180988 | “dienestrol” |
1141181594 | “estriol product” |
1141181700 | “estradiol product” |
1141182794 | “desogestrel product” |
1141190580 | “conjugated oestrogens 0.3mg / medroxyprogesterone 1.5mg tab” |
1141192344 | “cyproterone acetate+ethinylestradiol” |
1141192874 | “ethinylestradiol+norelgestromin 600mcg/6mg transdermal patch” |
1141193272 | “testogel 50mg gel 5g sachet” |
1141202030 | “estradot 25micrograms patch” |
1141181818 | “estradiol+norethisterone acetate 1mg/0.5mg tablet” |
%>%kbl()%>%
ssricodesscroll_box(width = "200px", height = "200px")
x |
---|
1140921600 |
1141180212 |
1140879540 |
1140867888 |
1140867878 |
1140867876 |
1140867884 |
1140867888 |
1140867860 |
%>%kbl()%>%
codesscroll_box(width = "200px", height = "200px")
V1 | V2 |
---|---|
30620 | Alanine_aminotransferase |
30600 | Albumin |
30610 | Alkaline_phosphatase |
30630 | Apolipoprotein_A |
30640 | Apolipoprotein_B |
30650 | Aspartate_aminotransferase |
30710 | C-reactive_protein |
30680 | Calcium |
30690 | Cholesterol |
30700 | Creatinine |
30720 | Cystatin_C |
30660 | Direct_bilirubin |
30730 | Gamma_glutamyltransferase |
30740 | Glucose |
30750 | Glycated_haemoglobin_(HbA1c) |
30760 | HDL_cholesterol |
30770 | IGF-1 |
30780 | LDL_direct |
30790 | Lipoprotein_A |
30800 | Oestradiol |
30810 | Phosphate |
30820 | Rheumatoid_factor |
30830 | SHBG |
30850 | Testosterone |
30840 | Total_bilirubin |
30860 | Total_protein |
30870 | Triglycerides |
30880 | Urate |
30670 | Urea |
30890 | Vitamin_D |
<-bd%>%select(f.eid,f.21003.0.0, f.31.0.0,
pheno189.0.0, f.54.0.0, f.22000.0.0,
f.1558.0.0,
f.paste("f.", codes$V1, ".0.0", sep=""),
22009.0.1:f.22009.0.20)
f.colnames(pheno)<-c("IID", "Age", "Sex",
"Townsend","Assessment_center", "Geno_batch",
"AlcoholFreq",
$V2,
codespaste("PCA", 1:20,sep=""))
<-as_tibble(pheno) pheno
<-bd_join4%>%select(f.eid, f.21001.0.0,
pheno220116.0.0,
f.3166.0.0)
f.
colnames(pheno2)<-c("IID","BMI",
"SmokeStatus",
"blood_draw_time")
<-left_join(pheno, pheno2, by="IID")
new<-as_tibble(new)
newcolnames(new)%>% kbl()%>%
scroll_box(width = "600px", height = "200px")
x |
---|
IID |
Age |
Sex |
Townsend |
Assessment_center |
Geno_batch |
AlcoholFreq |
Alanine_aminotransferase |
Albumin |
Alkaline_phosphatase |
Apolipoprotein_A |
Apolipoprotein_B |
Aspartate_aminotransferase |
C-reactive_protein |
Calcium |
Cholesterol |
Creatinine |
Cystatin_C |
Direct_bilirubin |
Gamma_glutamyltransferase |
Glucose |
Glycated_haemoglobin_(HbA1c) |
HDL_cholesterol |
IGF-1 |
LDL_direct |
Lipoprotein_A |
Oestradiol |
Phosphate |
Rheumatoid_factor |
SHBG |
Testosterone |
Total_bilirubin |
Total_protein |
Triglycerides |
Urate |
Urea |
Vitamin_D |
PCA1 |
PCA2 |
PCA3 |
PCA4 |
PCA5 |
PCA6 |
PCA7 |
PCA8 |
PCA9 |
PCA10 |
PCA11 |
PCA12 |
PCA13 |
PCA14 |
PCA15 |
PCA16 |
PCA17 |
PCA18 |
PCA19 |
PCA20 |
BMI |
SmokeStatus |
blood_draw_time |
dim(new)
## [1] 502527 60
#Fix hyphenated column names
colnames(new)[colnames(new) %in%
c("C-reactive_protein", "Glycated_haemoglobin_(HbA1c)",
"IGF-1")]=
c("C_reactive_protein","HbA1c","IGF_1")
nrow(withdrawn)
## [1] 114
<-new[!(new$IID %in% withdrawn$V1), ] new
nrow(QCids)
## [1] 356980
<-new[(new$IID %in% QCids$IID),] new
=as_tibble(read.csv("../VegMarkdown/DefineVeg-09062022.csv"))
Veg5yr
=Veg5yr%>%select(IID,Veg5yr1)
Vegadd<-left_join(new, Vegadd) new
## Joining, by = "IID"
=new%>%filter(!is.na(Veg5yr1))
newtable(new$Veg5yr1)
##
## 0 1 <NA>
## 153047 2328 0
# 0 1
#153047 2328
Age2
$Age2<-new$Age^2 new
Smoking: Turn 20116 SmokeStatus into two new dummy variables
$PreviousSmoker=0
new$PreviousSmoker[new$SmokeStatus==1]=1
new$CurrentSmoker=0
new$CurrentSmoker[new$SmokeStatus==2]=1 new
Alcohol: a binary measure of current drinking for self-reported current drinkers (three or more times a week against less than three times a week). Ref:https://doi.org/10.1038/s41467-019-12424-x
$AlcoholFreq=mapvalues(new$AlcoholFreq,
newfrom=c("Prefer not to answer", "Never", "Special occasions only",
"One to three times a month", "Once or twice a week",
"Three or four times a week", "Daily or almost daily"),
to=c(NA,0,0,0,0,1,1))
$AlcoholFreq=as.numeric(as.character(new$AlcoholFreq))
newtable(new$AlcoholFreq)%>%kbl()
Var1 | Freq |
---|---|
0 | 77958 |
1 | 77327 |
NA | 90 |
I didn’t use assessment center but this code is here for our labmates to use.
table(pheno$Assessment_center)
##
## 10003 11001 11002 11003 11004 11005 11006 11007 11008 11009 11010 11011 11012
## 3793 13939 14058 17878 18647 17195 19433 29409 28321 37002 44198 43012 12582
## 11013 11014 11016 11017 11018 11020 11021 11022 11023 <NA>
## 33876 30396 32816 21286 28875 27380 25501 2281 649 0
<-unique(new$Assessment_center)
centers<-paste("center", 1:22, sep="")
centercols<-0
new[centercols]
for (i in 1:length(centers)){
$Assessment_center==centers[i],][centercols[i]]<-1
new[new
}
<-new%>%select(-Assessment_center) new
$Geno_batch1<-0
new
$Geno_batch1[new$Geno_batch>0]<-1
new$Geno_batch<-new$Geno_batch1
new<-new%>%select(-Geno_batch1)
new
table(new$Geno_batch)%>%kbl()
Var1 | Freq |
---|---|
0 | 15940 |
1 | 139435 |
NA | 0 |
$Sex<-mapvalues(as.character(new$Sex),
newc("Male", "Female"), c(1,2))
$Sex<-as.numeric(new$Sex) new
$zblood<-str_split(new$blood_draw_time, " ", simplify = TRUE)[,2] #Take just time
new$zblood<-str_split(new$zblood, ":", simplify=TRUE)[,c(1,2)] #take minutes and seconds
new
$zblood[,2]<-as.numeric(new$zblood[,2])/60 #convert minutes to hours
new
$zblood<-as.numeric(new$zblood[,1])+as.numeric(new$zblood[,2]) #add together
new
$zblood<-(new$zblood - mean(new$zblood, na.rm=TRUE)) / sd(new$zblood, na.rm=TRUE) #standardize
new
#This column with the colons ":" messes up many softwares so remove it
=new%>%select(-blood_draw_time) new
$zTownsend=(new$Townsend - mean(new$Townsend, na.rm=TRUE)) /sd(new$Townsend, na.rm=TRUE) new
$zAge=(new$Age - mean(new$Age, na.rm=TRUE)) / sd(new$Age, na.rm=TRUE) new
$logSHBG=log(new$SHBG) new
CKD-EPI creatinine-cystatin equation (2021)
https://www.kidney.org/content/ckd-epi-creatinine-cystatin-equation-2021
#Creatinine default units umol/L
#Formula needs mg/dL
#To convert μmol/l to mg/dL, divide by 88.4.
$Scr<-new$Creatinine/88.42
new
#Cystatin C default units: mg/L
#Formula needs mg/L. No conversion necessary
#eGFR: CKD-EPI Creatinine-Cystatin equation:
$eGFR<-0 #initialize col
new$eGFR[new$Sex=="1"]<-135*
newpmin((new$Scr[new$Sex=="1"])/0.9, 1))^(-0.144) *
(pmax(new$Scr[new$Sex=="1"]/0.9, 1))^(-0.544) *
(pmin((new$Cystatin_C[new$Sex=="1"])/0.8, 1))^(-0.323) *
(pmax((new$Cystatin_C[new$Sex=="1"])/0.8, 1))^(-0.778) *
(0.9961^(new$Age[new$Sex=="1"])
$eGFR[new$Sex=="2"]<-135*
newpmin((new$Scr[new$Sex=="2"])/0.7, 1))^(-0.219) *
(pmax(new$Scr[new$Sex=="2"]/0.7, 1))^(-0.544) *
(pmin((new$Cystatin_C[new$Sex=="2"])/0.8, 1))^(-0.323) *
(pmax((new$Cystatin_C[new$Sex=="2"])/0.8, 1))^(-0.778) *
(0.9961^(new$Age[new$Sex=="2"]) * 0.963 #for females
#Winsorize
$eGFR<-Winsorize(new$eGFR, minval = 15, maxval = 200,
newprobs = c(0,1), type=1)
#Calculate bioavailable T
<-cbat(new$Testosterone, SHBG = new$SHBG,
cbatoutputALB = new$Albumin)
$bioavailableTest<-cbatoutput$bioavailableTest
new$freeTest=cbatoutput$freeTest new
<-c(sprintf("f.20003.0.%s", 0:47))
statincols<-c(1141146234,1141192414,1140910632,1140888594,1140864592,
statincodes1141146138,1140861970,1140888648,1141192410,
1141188146,1140861958,1140881748,1141200040)
manyColsToDummy(statincodes, bd_join4[,statincols], "statinoutput")
$statins<-rowSums(statinoutput)
statinoutput$statins[statinoutput$statins>1]<-1
statinoutput
$IID<-bd_join4$f.eid
statinoutput
<-statinoutput%>%select(IID, statins)
statinoutput
<-left_join(new, statinoutput, by="IID") new
Cholesterol, LDL-C, and Apolipoprotein B1
sum(new$statins, na.rm=T)
## [1] 22282
$Cholesterolold=new$Cholesterol
new$LDL_directold=new$LDL_direct
new$Apolipoprotein_Bold=new$Apolipoprotein_B
new
$Cholesterol[which(new$statins==1)]<-
new$Cholesterol[which(new$statins==1)]/0.749
new
$LDL_direct[which(new$statins==1)]<-
new$LDL_direct[which(new$statins==1)]/0.684
new
$Apolipoprotein_B[which(new$statins==1)]<-
new$Apolipoprotein_B[which(new$statins==1)]/0.719 new
<-c("f.eid", sprintf("f.20003.0.%s", 0:47))
medcols
<-hormonemeds$code #sourced from sexHormoneMeds.csv
medcodes
names(medcodes)<-hormonemeds$name
manyColsToDummy(medcodes, bd_join4[,medcols], "medoutput")
colnames(medoutput)<-names(medcodes)
$tookMed<-apply(medoutput, 1, sum, na.rm=TRUE)
medoutput
nrow(medoutput[medoutput$tookMed>0,])
## [1] 12665
$tookMed[medoutput$tookMed>1]<-1
medoutput
$IID<-bd_join4$f.eid
medoutput
<-medoutput%>%select(IID, tookMed)
medoutput2
<-left_join(new, medoutput2, by= "IID") new
sum(is.na((new$Testosterone)))
## [1] 21032
$Testosterone[which(!is.na(new$Testosterone) & new$tookMed==1)]<-NA
newsum(is.na((new$Testosterone)))
## [1] 24118
sum(is.na((new$bioavailableTest)))
## [1] 33053
$bioavailableTest[which(!is.na(new$bioavailableTest) & new$tookMed==1)]<-NA
newsum(is.na((new$bioavailableTest)))
## [1] 35784
manyColsToDummy(ssricodes, bd_join4[,medcols], "ssrioutput")
colnames(ssrioutput)<-names(ssricodes)
$tookSSRI<-apply(ssrioutput, 1, sum, na.rm=TRUE)
ssrioutput
nrow(ssrioutput[ssrioutput$tookSSRI>0,])
## [1] 19309
$tookSSRI[ssrioutput$tookSSRI>1]<-1
ssrioutput
$IID<-bd_join4$f.eid
ssrioutput
<-ssrioutput%>%select(IID, tookSSRI)
ssrioutput2
<-left_join(new, ssrioutput2, by= "IID") new
<- function(x) {
inversenormal # inverse normal if you have missing data
return(qnorm((rank(x,na.last="keep", ties.method="random")-0.5)/sum(!is.na(x))))
}
=new%>%select(Alanine_aminotransferase:Vitamin_D, eGFR,bioavailableTest, freeTest)
phenotypescolnames(phenotypes)%>%kbl()%>%
scroll_box(width = "600px", height = "200px")
x |
---|
Alanine_aminotransferase |
Albumin |
Alkaline_phosphatase |
Apolipoprotein_A |
Apolipoprotein_B |
Aspartate_aminotransferase |
C_reactive_protein |
Calcium |
Cholesterol |
Creatinine |
Cystatin_C |
Direct_bilirubin |
Gamma_glutamyltransferase |
Glucose |
HbA1c |
HDL_cholesterol |
IGF_1 |
LDL_direct |
Lipoprotein_A |
Oestradiol |
Phosphate |
Rheumatoid_factor |
SHBG |
Testosterone |
Total_bilirubin |
Total_protein |
Triglycerides |
Urate |
Urea |
Vitamin_D |
eGFR |
bioavailableTest |
freeTest |
=as_tibble(sapply(phenotypes, inversenormal))
rintcolnames(rint)=paste(colnames(rint), "rint", sep="_")
$IID<-new$IID
rint
<-left_join(new, rint, by="IID") new
="pheno"
outdirsuppressWarnings(dir.create(outdir))
$FID<-new$IID
new
<-new%>%select(FID, everything())
new
<-new%>%select(FID,IID)
participants
write.table(new,
paste(outdir, "/BioxVeg_pheno_09112022.txt", sep=""),
row.names=FALSE, quote=FALSE)
write.csv(new,
paste(outdir, "/BioxVeg_pheno_09112022.csv", sep=""),
row.names=FALSE, quote=FALSE)
Westerman 2022:for those with self-reported use of a statin medication, each of these biomarkers was divided by an adjustment factor (0.749, 0.684, and 0.719, respectively) that had been empirically estimated by Sinnott-Armstrong and colleagues in the same population.↩︎
<-matrix(NA,nrow(new),length(phenotypes))
resdatcolnames(resdat)<-phenotypes
for (p in 1:length(phenotypes)){
assign(paste("lm", phenotypes[p], sep="_"),
lm(new[[phenotypes[p]]] ~
Age,Sex, zTownsend, #ADD COVARIATES HERE
data=new,
na.action=na.exclude))
<-paste("lm", phenotypes[p], sep="_")
lmname<-get(lmname)
lmobj<-resid(lmobj)
resdat[,p]
}
<-as_tibble(resdat)
resdatcolnames(resdat)<-paste(phenotypes, "res", sep="_")
<-matrix(NA,nrow(new),length(phenotypes))
resdat_invcolnames(resdat_inv)<-paste(phenotypes, "resinv", sep="_")
<- function(x) {
inversenormal # inverse normal if you have missing data
return(qnorm((rank(x,na.last="keep", ties.method="random")-0.5)/sum(!is.na(x))))
}
for (i in 1:length(phenotypes)){
<- inversenormal(resdat[,i])
resdat_inv[,i]
}
<-as_tibble(as.data.frame(resdat_inv))
resdat_inv$IID<-new$IID
resdat_inv
<-left_join(new, resdat_inv, by="IID")
new1
###
### Direct rank-based inverse normal transformation (D-INT)
#### Phenotypes
=new%>%select(Alanine_aminotransferase:Vitamin_D, eGFR,
phenotypes
bioavailableTest)colnames(phenotypes)