Prepare the phenotype table for biomarker traits analyzed in this study.

Load packages + make it so table always displays NA’s

suppressMessages(silent <- lapply(
    c("plyr", "dplyr", "tidyverse", "data.table", "vroom", "knitr", "kableExtra", "DescTools", "skimr"), 
    library, character.only=T))
table = function (..., useNA = 'always') base::table(..., useNA = useNA)

Load UK Biobank dataset

## [1] 502527   5172

This code chunk has been modified for display to hide the 5-digit code that came with our data table

bd=vroom("/Users/mike/Documents/R_files/UKBpheno/pheno/ukbXXXXX.tab", delim="\t", show_col_types = FALSE)
source("src/components/ukbXXXXX_factordata.R") #file provided by UKB "ukbxxxxx_loaddata.R" without the loading part, to label the responses in survey questions
bd=as_tibble(bd)
dim(bd)

Load in second UK Biobank dataset

#read.lines("src/components/load_UKBphenotables.r") #how I made this file

bd_join4<-vroom("bd_join4.tab", delim="\t", show_col_types = FALSE)
bd_join4=as_tibble(bd_join4)
dim(bd_join4)
## [1] 501560   2584

Read in extra files

source("src/components/manyColsToDummy.R")
hormonemeds<-read.csv("src/components/sexHormoneMeds.csv")
source("src/components/cbat.R")
source("src/components/SSRI_Meds.R")
codes<-read.table("src/components/phenocodes.txt", header=F)
QCids<-read.table("src/components/bd_QC-keep.txt",header=TRUE)
withdrawn<-read.csv("src/components/w48818_20220222.csv", header = FALSE)
hormonemeds%>%kbl()%>%
 scroll_box(width = "200px", height = "200px")
code name
1140869270 “medroxyprogesterone”
1140910674 “ethinylnortestosterone”
1140857656 “methyltestosterone product”
1140865136 “yohimbine/pemoline/methyltestosterone”
1140868532 “testosterone product”
1140857690 “oestradiol 25mg implant 36 week”
1140857700 “oestradiol 1mg/1ml injection”
1141165318 “cetrorelix”
1140917306 “bicalutamide”
1140917310 “casodex 50mg tablet”
1140917448 “oestradiol 1.25g/dose gel”
1140917450 “oestrogel 1.25g gel”
1141171536 “ganirelix”
1140884638 “clomiphene”
1140884634 “cyproterone”
1140870186 “oestrifen 10mg tablet”
1140921100 “triptorelin”
1140868984 “nafarelin”
1140868978 “gestrinone”
1140884544 “leuprorelin”
1140870248 “buserelin”
1140870194 “goserelin”
1141157394 “goserelin product”
1140868882 “gonadorelin”
1141182558 “urofollitropin”
1140868722 “cyclofenil”
1140868580 “norethisterone”
1140869328 “ethinyloestradiol+norethisterone acetate 20mcg/1mg tablet”
1140869356 “mestranol+norethisterone 50micrograms/1mg tablet”
1141157406 “norethisterone product”
1141168324 “oestradiol+norethisterone acetate 1mg/0.5mg tablet”
1141181298 “ethinylestradiol+norethisterone acetate 20mcg/1mg tablet”
1141181204 “ethinylestradiol+norgestimate 35mcg/250mcg tablet”
1141181218 “ethinylestradiol product”
1141181220 “ethinylestradiol”
1141181240 “ethinylestradiol+levonorgestrel 30mcg/150mcg tablet”
1141181286 “ethinylestradiol+desogestrel 20mcg/150mcg tablet”
1140869348 “ethinyloestradiol+norgestimate 35mcg/250mcg tablet”
1140868520 “estracombi tts patch”
1140857628 “gestone 10mg/1ml injection”
1140857704 “ovestin 250micrograms tablet”
1140857706 “oestriol 250micrograms tablet”
1140857708 “quinestradol”
1140857710 “pentovis 250micrograms tablet”
1140857714 “quinestrol”
1140857716 “estrovis 4mg tablet”
1140857736 “virormone 10mg/1ml injection”
1140857912 “desogestrel”
1140857918 “lynoestrenol”
1140857920 “minilyn tablet”
1140858324 “medroxyprogest 80mg/ml suspension 100ml”
1140858332 “sh-420 10mg tablet”
1140858338 “drostanolone propionate”
1140858340 “masteril 100mg/1ml injection”
1140858348 “noltam 10mg tablet”
1140864196 “climagest 1mg tablet”
1140864502 “testotop tts 15mg transdermal patch”
1140868400 “oestriol product”
1140868372 “climaval 1mg tablet”
1140868406 “conjugated oestrogens”
1140868408 “premarin 625micrograms tablet”
1140868420 “piperazine oestrone sulphate”
1140868422 “harmogen 1.5mg tablet”
1140868472 “vagifem 25mcg pessary”
1140868482 “tibolone”
1140868488 “allyloestrenol”
1140868490 “gestanin 5mg tablet”
1140868494 “dydrogesterone”
1140868496 “duphaston 10mg tablet”
1140868508 “cyclo-progynova 1mg tablet”
1140868512 “syntex menophase tablet”
1140868514 “trisequens tablet”
1140868518 “nuvelle tablet”
1140868524 “androcur 50mg tablet”
1140868526 “mesterolone”
1140868528 “pro-viron 25mg tablet”
1140868534 “primoteston depot 250mg/1ml oily injection”
1140868536 “restandol 40mg capsule”
1140868538 “sustanon 100 oily injection”
1140868584 “primolut-n 5mg tablet”
1140868586 “utovlan 5mg tablet”
1140868588 “progesterone product”
1140868590 “cyclogest 200mg suppository”
1140868614 “deca-durabolin 25mg/1ml oily injection”
1140868618 “stanozolol”
1140868620 “stromba 5mg tablet”
1140868628 “humegon 75iu injection (pdr for recon)+solvent”
1140869032 “dienoestrol”
1140869034 “ortho-gynest 500micrograms pessary”
1140869166 “ethinyloestradiol+desogestrel 20mcg/150mcg tablet”
1140869170 “conova 30 tablet”
1140869172 “ethinyloestradiol+ethynodiol diacetate 30mcg/2mg tablet”
1140869174 “eugynon 30 tablet”
1140869176 “logynon tablet”
1140869180 “microgynon 30 tablet”
1140869184 “ovran 30 tablet”
1140869186 “ovranette tablet”
1140869188 “schering pc4 tablet”
1140869190 “trinordiol tablet”
1140869248 “ethinyloestradiol+levonorgestrel 30mcg/150mcg tablet”
1140869254 “binovum tablet”
1140869256 “brevinor tablet”
1140869258 “neocon 1/35 tablet”
1140869260 “norimin tablet”
1140869262 “ovysmen tablet”
1140869264 “synphase tablet”
1140869266 “trinovum tablet”
1140869272 “neogest tablet”
1140869276 “micronor tablet”
1140869278 “noriday tablet”
1140869282 “noristerat 200mg/1ml oily injection”
1140869324 “loestrin 20 tablet”
1140869332 “minulet tablet”
1140869334 “femodene tablet”
1140869338 “tri-minulet tablet”
1140869340 “triadene tablet”
1140869346 “cilest tablet”
1140869352 “norinyl-1 tablet”
1140869354 “ortho-novin 1/50 tablet”
1140869360 “ethynodiol diacetate”
1140869362 “femulen tablet”
1140869366 “levonorgestrel”
1140869368 “microval tablet”
1140869370 “norgeston tablet”
1140870060 “polyestradiol phosphate”
1140870052 “honvan 100mg tablet”
1140870062 “estradurin 40mg injection (pdr for recon)+diluent”
1140870070 “stilboestrol”
1140870076 “apstil 1mg tablet”
1140876638 “cyproterone acetate+ethinyloestradiol”
1140879554 “estramustine”
1140884622 “oestrogen product”
1140884624 “fosfestrol”
1140884626 “mestranol”
1140884688 “hydroxyprogesterone”
1140909906 “estropipate”
1140910562 “gestonorone”
1140921088 “tridestra tablet”
1141157404 “ethinyloestradiol product”
1141157410 “levonorgestrel product”
1141157492 “mestranol product”
1141166196 “etonogestrel”
1141166354 “testoderm 6mg/24hours transdermal patch”
1141166366 “ethinyloestradiol+gestodene 20micrograms/75micrograms tablet”
1141167206 “oestrogel 0.06% gel”
1141177150 “adgyn medro 5mg tablet”
1141177158 “adgyn estro 2mg tablet”
1141177226 “adgyn combi 2mg tablet”
1141179822 “ethinylestradiol+drospirenone 30micrograms/3mg tablet”
1141179820 “drospirenone”
1141180988 “dienestrol”
1141181594 “estriol product”
1141181700 “estradiol product”
1141182794 “desogestrel product”
1141190580 “conjugated oestrogens 0.3mg / medroxyprogesterone 1.5mg tab”
1141192344 “cyproterone acetate+ethinylestradiol”
1141192874 “ethinylestradiol+norelgestromin 600mcg/6mg transdermal patch”
1141193272 “testogel 50mg gel 5g sachet”
1141202030 “estradot 25micrograms patch”
1141181818 “estradiol+norethisterone acetate 1mg/0.5mg tablet”
ssricodes%>%kbl()%>%
 scroll_box(width = "200px", height = "200px")
x
1140921600
1141180212
1140879540
1140867888
1140867878
1140867876
1140867884
1140867888
1140867860
codes%>%kbl()%>%
 scroll_box(width = "200px", height = "200px")
V1 V2
30620 Alanine_aminotransferase
30600 Albumin
30610 Alkaline_phosphatase
30630 Apolipoprotein_A
30640 Apolipoprotein_B
30650 Aspartate_aminotransferase
30710 C-reactive_protein
30680 Calcium
30690 Cholesterol
30700 Creatinine
30720 Cystatin_C
30660 Direct_bilirubin
30730 Gamma_glutamyltransferase
30740 Glucose
30750 Glycated_haemoglobin_(HbA1c)
30760 HDL_cholesterol
30770 IGF-1
30780 LDL_direct
30790 Lipoprotein_A
30800 Oestradiol
30810 Phosphate
30820 Rheumatoid_factor
30830 SHBG
30850 Testosterone
30840 Total_bilirubin
30860 Total_protein
30870 Triglycerides
30880 Urate
30670 Urea
30890 Vitamin_D

Extract columns from main bd dataframe

pheno<-bd%>%select(f.eid,f.21003.0.0, f.31.0.0,
                    f.189.0.0, f.54.0.0, f.22000.0.0,
                    f.1558.0.0,
                    paste("f.", codes$V1, ".0.0", sep=""),
                    f.22009.0.1:f.22009.0.20)
colnames(pheno)<-c("IID", "Age", "Sex",  
                    "Townsend","Assessment_center", "Geno_batch",
                    "AlcoholFreq",
                    codes$V2,
                    paste("PCA", 1:20,sep=""))

pheno<-as_tibble(pheno)

Extract cols from second UKB dataframe and join the two

pheno2<-bd_join4%>%select(f.eid, f.21001.0.0, 
                          f.20116.0.0,
                          f.3166.0.0)

colnames(pheno2)<-c("IID","BMI", 
                    "SmokeStatus",         
                    "blood_draw_time")


new<-left_join(pheno, pheno2, by="IID")
new<-as_tibble(new)
colnames(new)%>% kbl()%>%
    scroll_box(width = "600px", height = "200px")
x
IID
Age
Sex
Townsend
Assessment_center
Geno_batch
AlcoholFreq
Alanine_aminotransferase
Albumin
Alkaline_phosphatase
Apolipoprotein_A
Apolipoprotein_B
Aspartate_aminotransferase
C-reactive_protein
Calcium
Cholesterol
Creatinine
Cystatin_C
Direct_bilirubin
Gamma_glutamyltransferase
Glucose
Glycated_haemoglobin_(HbA1c)
HDL_cholesterol
IGF-1
LDL_direct
Lipoprotein_A
Oestradiol
Phosphate
Rheumatoid_factor
SHBG
Testosterone
Total_bilirubin
Total_protein
Triglycerides
Urate
Urea
Vitamin_D
PCA1
PCA2
PCA3
PCA4
PCA5
PCA6
PCA7
PCA8
PCA9
PCA10
PCA11
PCA12
PCA13
PCA14
PCA15
PCA16
PCA17
PCA18
PCA19
PCA20
BMI
SmokeStatus
blood_draw_time
dim(new)
## [1] 502527     60
#Fix hyphenated column names
colnames(new)[colnames(new) %in% 
                  c("C-reactive_protein", "Glycated_haemoglobin_(HbA1c)", 
                    "IGF-1")]=
    c("C_reactive_protein","HbA1c","IGF_1")

Remove withdrawn participants

nrow(withdrawn)
## [1] 114
new<-new[!(new$IID %in% withdrawn$V1), ]

QC participants via output of UKB_participantQC.R

nrow(QCids)
## [1] 356980
new<-new[(new$IID %in% QCids$IID),]

Add Vegetarian Columns (output from DefineVeg step)

Veg5yr=as_tibble(read.csv("../VegMarkdown/DefineVeg-09062022.csv"))

Vegadd=Veg5yr%>%select(IID,Veg5yr1)
new<-left_join(new, Vegadd) 
## Joining, by = "IID"
new=new%>%filter(!is.na(Veg5yr1))
table(new$Veg5yr1)
## 
##      0      1   <NA> 
## 153047   2328      0
#     0      1 
#153047   2328

Update covariate columns for use in our models

Age2

new$Age2<-new$Age^2

Smoking: Turn 20116 SmokeStatus into two new dummy variables

new$PreviousSmoker=0
new$PreviousSmoker[new$SmokeStatus==1]=1
new$CurrentSmoker=0
new$CurrentSmoker[new$SmokeStatus==2]=1

Alcohol: a binary measure of current drinking for self-reported current drinkers (three or more times a week against less than three times a week). Ref:https://doi.org/10.1038/s41467-019-12424-x

new$AlcoholFreq=mapvalues(new$AlcoholFreq,
                from=c("Prefer not to answer", "Never", "Special occasions only",
                "One to three times a month", "Once or twice a week",
                "Three or four times a week", "Daily or almost daily"),
                to=c(NA,0,0,0,0,1,1))
new$AlcoholFreq=as.numeric(as.character(new$AlcoholFreq))
table(new$AlcoholFreq)%>%kbl()
Var1 Freq
0 77958
1 77327
NA 90

Make dummy 0/1 cols for each assessment center

I didn’t use assessment center but this code is here for our labmates to use.

table(pheno$Assessment_center)
## 
## 10003 11001 11002 11003 11004 11005 11006 11007 11008 11009 11010 11011 11012 
##  3793 13939 14058 17878 18647 17195 19433 29409 28321 37002 44198 43012 12582 
## 11013 11014 11016 11017 11018 11020 11021 11022 11023  <NA> 
## 33876 30396 32816 21286 28875 27380 25501  2281   649     0
centers<-unique(new$Assessment_center)
centercols<-paste("center", 1:22, sep="")
new[centercols]<-0

for (i in 1:length(centers)){
    new[new$Assessment_center==centers[i],][centercols[i]]<-1
}

new<-new%>%select(-Assessment_center)

Genotype batch

new$Geno_batch1<-0

new$Geno_batch1[new$Geno_batch>0]<-1
new$Geno_batch<-new$Geno_batch1
new<-new%>%select(-Geno_batch1)

table(new$Geno_batch)%>%kbl() 
Var1 Freq
0 15940
1 139435
NA 0

Switch sex factor to numeric

new$Sex<-mapvalues(as.character(new$Sex), 
                     c("Male", "Female"), c(1,2))
new$Sex<-as.numeric(new$Sex)

Z-transformation of blood draw times

new$zblood<-str_split(new$blood_draw_time, " ", simplify = TRUE)[,2] #Take just time
new$zblood<-str_split(new$zblood, ":", simplify=TRUE)[,c(1,2)] #take minutes and seconds

new$zblood[,2]<-as.numeric(new$zblood[,2])/60 #convert minutes to hours

new$zblood<-as.numeric(new$zblood[,1])+as.numeric(new$zblood[,2]) #add together

new$zblood<-(new$zblood - mean(new$zblood, na.rm=TRUE)) / sd(new$zblood, na.rm=TRUE) #standardize

#This column with the colons ":" messes up many softwares so remove it
new=new%>%select(-blood_draw_time)

Z transform Townsend score

new$zTownsend=(new$Townsend - mean(new$Townsend, na.rm=TRUE)) /sd(new$Townsend, na.rm=TRUE)

Z transform Age

new$zAge=(new$Age - mean(new$Age, na.rm=TRUE)) / sd(new$Age, na.rm=TRUE)

log SHBG

new$logSHBG=log(new$SHBG)

Calculate eGFR

CKD-EPI creatinine-cystatin equation (2021)

https://www.kidney.org/content/ckd-epi-creatinine-cystatin-equation-2021

#Creatinine default units umol/L 
#Formula needs mg/dL
#To convert μmol/l to mg/dL, divide by 88.4. 
new$Scr<-new$Creatinine/88.42

#Cystatin C default units: mg/L
#Formula needs mg/L. No conversion necessary
#eGFR: CKD-EPI Creatinine-Cystatin equation: 
new$eGFR<-0 #initialize col
new$eGFR[new$Sex=="1"]<-135*
    (pmin((new$Scr[new$Sex=="1"])/0.9, 1))^(-0.144) *
    (pmax(new$Scr[new$Sex=="1"]/0.9, 1))^(-0.544) * 
    (pmin((new$Cystatin_C[new$Sex=="1"])/0.8, 1))^(-0.323) *
    (pmax((new$Cystatin_C[new$Sex=="1"])/0.8, 1))^(-0.778) *
    0.9961^(new$Age[new$Sex=="1"])

new$eGFR[new$Sex=="2"]<-135*
    (pmin((new$Scr[new$Sex=="2"])/0.7, 1))^(-0.219) *
    (pmax(new$Scr[new$Sex=="2"]/0.7, 1))^(-0.544) * 
    (pmin((new$Cystatin_C[new$Sex=="2"])/0.8, 1))^(-0.323) *
    (pmax((new$Cystatin_C[new$Sex=="2"])/0.8, 1))^(-0.778) *
    0.9961^(new$Age[new$Sex=="2"]) * 0.963 #for females
#Winsorize
new$eGFR<-Winsorize(new$eGFR, minval = 15, maxval = 200, 
                    probs = c(0,1), type=1)

Add bioavail and free testosterone

#Calculate bioavailable T
cbatoutput<-cbat(new$Testosterone, SHBG = new$SHBG, 
                 ALB = new$Albumin)
new$bioavailableTest<-cbatoutput$bioavailableTest
new$freeTest=cbatoutput$freeTest

Find statin users

statincols<-c(sprintf("f.20003.0.%s", 0:47))
statincodes<-c(1141146234,1141192414,1140910632,1140888594,1140864592,
    1141146138,1140861970,1140888648,1141192410,
    1141188146,1140861958,1140881748,1141200040)

manyColsToDummy(statincodes, bd_join4[,statincols], "statinoutput")
statinoutput$statins<-rowSums(statinoutput) 
statinoutput$statins[statinoutput$statins>1]<-1

statinoutput$IID<-bd_join4$f.eid

statinoutput<-statinoutput%>%select(IID, statins)

new<-left_join(new, statinoutput, by="IID")

Adjust lipid phenotypes

Cholesterol, LDL-C, and Apolipoprotein B1

sum(new$statins, na.rm=T)
## [1] 22282
new$Cholesterolold=new$Cholesterol
new$LDL_directold=new$LDL_direct
new$Apolipoprotein_Bold=new$Apolipoprotein_B

new$Cholesterol[which(new$statins==1)]<-
    new$Cholesterol[which(new$statins==1)]/0.749

new$LDL_direct[which(new$statins==1)]<-
        new$LDL_direct[which(new$statins==1)]/0.684


new$Apolipoprotein_B[which(new$statins==1)]<-
        new$Apolipoprotein_B[which(new$statins==1)]/0.719

Taking hormone medications

medcols<-c("f.eid", sprintf("f.20003.0.%s", 0:47))

medcodes<-hormonemeds$code #sourced from sexHormoneMeds.csv

names(medcodes)<-hormonemeds$name

manyColsToDummy(medcodes, bd_join4[,medcols], "medoutput")

colnames(medoutput)<-names(medcodes)

medoutput$tookMed<-apply(medoutput, 1, sum, na.rm=TRUE)

nrow(medoutput[medoutput$tookMed>0,])
## [1] 12665
medoutput$tookMed[medoutput$tookMed>1]<-1

medoutput$IID<-bd_join4$f.eid

medoutput2<-medoutput%>%select(IID, tookMed)

new<-left_join(new, medoutput2, by= "IID")

Remove testosterone values for those taking hormone medication

sum(is.na((new$Testosterone)))
## [1] 21032
new$Testosterone[which(!is.na(new$Testosterone) & new$tookMed==1)]<-NA
sum(is.na((new$Testosterone)))
## [1] 24118
sum(is.na((new$bioavailableTest)))
## [1] 33053
new$bioavailableTest[which(!is.na(new$bioavailableTest) & new$tookMed==1)]<-NA
sum(is.na((new$bioavailableTest)))
## [1] 35784

Taking SSRIs

manyColsToDummy(ssricodes, bd_join4[,medcols], "ssrioutput")

colnames(ssrioutput)<-names(ssricodes)

ssrioutput$tookSSRI<-apply(ssrioutput, 1, sum, na.rm=TRUE)

nrow(ssrioutput[ssrioutput$tookSSRI>0,])
## [1] 19309
ssrioutput$tookSSRI[ssrioutput$tookSSRI>1]<-1

ssrioutput$IID<-bd_join4$f.eid

ssrioutput2<-ssrioutput%>%select(IID, tookSSRI)

new<-left_join(new, ssrioutput2, by= "IID")

Indirect rank-based inverse normal transformation (I-INT) (didn’t use in this study)2

Function

inversenormal <- function(x) {
    # inverse normal if you have missing data 
    return(qnorm((rank(x,na.last="keep", ties.method="random")-0.5)/sum(!is.na(x))))
}

Make RINT transformed columns, add back to original table

phenotypes=new%>%select(Alanine_aminotransferase:Vitamin_D, eGFR,bioavailableTest, freeTest)
 colnames(phenotypes)%>%kbl()%>% 
    scroll_box(width = "600px", height = "200px")
x
Alanine_aminotransferase
Albumin
Alkaline_phosphatase
Apolipoprotein_A
Apolipoprotein_B
Aspartate_aminotransferase
C_reactive_protein
Calcium
Cholesterol
Creatinine
Cystatin_C
Direct_bilirubin
Gamma_glutamyltransferase
Glucose
HbA1c
HDL_cholesterol
IGF_1
LDL_direct
Lipoprotein_A
Oestradiol
Phosphate
Rheumatoid_factor
SHBG
Testosterone
Total_bilirubin
Total_protein
Triglycerides
Urate
Urea
Vitamin_D
eGFR
bioavailableTest
freeTest
rint=as_tibble(sapply(phenotypes, inversenormal))
colnames(rint)=paste(colnames(rint), "rint", sep="_")
rint$IID<-new$IID

new<-left_join(new, rint, by="IID")

Write output

outdir="pheno"
suppressWarnings(dir.create(outdir))
new$FID<-new$IID

new<-new%>%select(FID, everything())

participants<-new%>%select(FID,IID)

write.table(new,
    paste(outdir, "/BioxVeg_pheno_09112022.txt", sep=""),
    row.names=FALSE, quote=FALSE)

write.csv(new,
        paste(outdir, "/BioxVeg_pheno_09112022.csv", sep=""),
        row.names=FALSE, quote=FALSE)

  1. Westerman 2022:for those with self-reported use of a statin medication, each of these biomarkers was divided by an adjustment factor (0.749, 0.684, and 0.719, respectively) that had been empirically estimated by Sinnott-Armstrong and colleagues in the same population.↩︎

  2. Calculate residuals

    resdat<-matrix(NA,nrow(new),length(phenotypes))
    colnames(resdat)<-phenotypes
    
    for (p in 1:length(phenotypes)){
        assign(paste("lm", phenotypes[p], sep="_"), 
               lm(new[[phenotypes[p]]] ~ 
                    Age,Sex, zTownsend, 
                    #ADD COVARIATES HERE
                  data=new, 
                  na.action=na.exclude))
        lmname<-paste("lm", phenotypes[p], sep="_")
        lmobj<-get(lmname)
        resdat[,p]<-resid(lmobj)
    }
    
    resdat<-as_tibble(resdat)
    colnames(resdat)<-paste(phenotypes, "res", sep="_")
    
    resdat_inv<-matrix(NA,nrow(new),length(phenotypes))
    colnames(resdat_inv)<-paste(phenotypes, "resinv", sep="_")
    
    inversenormal <- function(x) { 
        # inverse normal if you have missing data 
        return(qnorm((rank(x,na.last="keep", ties.method="random")-0.5)/sum(!is.na(x)))) 
    } 
    
    for (i in 1:length(phenotypes)){
        resdat_inv[,i] <- inversenormal(resdat[,i])
    }
    
    resdat_inv<-as_tibble(as.data.frame(resdat_inv))
    resdat_inv$IID<-new$IID
    
    new1<-left_join(new, resdat_inv, by="IID")
    
    ### 
    
    ### Direct rank-based inverse normal transformation (D-INT)
    
    #### Phenotypes
    
    phenotypes=new%>%select(Alanine_aminotransferase:Vitamin_D, eGFR, 
                        bioavailableTest)
    colnames(phenotypes)
    ↩︎