Prepare the phenotype table for biomarker traits analyzed in this study.

Load packages + make it so table always displays NA’s

suppressMessages(silent <- lapply(
    c("plyr", "dplyr", "tidyverse", "data.table", "vroom", "knitr", "kableExtra", "DescTools", "skimr"), 
    library, character.only=T))
table = function (..., useNA = 'always') base::table(..., useNA = useNA)

Load UK Biobank dataset

## [1] 502527   5172

This code chunk has been modified for display to hide the 5-digit code that came with our data table

bd=vroom("/Users/mike/Documents/R_files/UKBpheno/pheno/ukbXXXXX.tab", delim="\t", show_col_types = FALSE)
source("src/components/ukbXXXXX_factordata.R") #file provided by UKB "ukbxxxxx_loaddata.R" without the loading part, to label the responses in survey questions
bd=as_tibble(bd)
dim(bd)

Load in second UK Biobank dataset

#read.lines("src/components/load_UKBphenotables.r") #how I made this file

bd_join4<-vroom("bd_join4.tab", delim="\t", show_col_types = FALSE)
bd_join4=as_tibble(bd_join4)
dim(bd_join4)

## [1] 501560   2584

Read in extra files

source("src/components/manyColsToDummy.R")
hormonemeds<-read.csv("src/components/sexHormoneMeds.csv")
source("src/components/cbat.R")
source("src/components/SSRI_Meds.R")
codes<-read.table("src/components/phenocodes.txt", header=F)
QCids<-read.table("src/components/bd_QC-keep.txt",header=TRUE)
withdrawn<-read.csv("src/components/w48818_20220222.csv", header = FALSE)
hormonemeds%>%kbl()%>%
 scroll_box(width = "200px", height = "200px")

code	name
1140869270	“medroxyprogesterone”
1140910674	“ethinylnortestosterone”
1140857656	“methyltestosterone product”
1140865136	“yohimbine/pemoline/methyltestosterone”
1140868532	“testosterone product”
1140857690	“oestradiol 25mg implant 36 week”
1140857700	“oestradiol 1mg/1ml injection”
1141165318	“cetrorelix”
1140917306	“bicalutamide”
1140917310	“casodex 50mg tablet”
1140917448	“oestradiol 1.25g/dose gel”
1140917450	“oestrogel 1.25g gel”
1141171536	“ganirelix”
1140884638	“clomiphene”
1140884634	“cyproterone”
1140870186	“oestrifen 10mg tablet”
1140921100	“triptorelin”
1140868984	“nafarelin”
1140868978	“gestrinone”
1140884544	“leuprorelin”
1140870248	“buserelin”
1140870194	“goserelin”
1141157394	“goserelin product”
1140868882	“gonadorelin”
1141182558	“urofollitropin”
1140868722	“cyclofenil”
1140868580	“norethisterone”
1140869328	“ethinyloestradiol+norethisterone acetate 20mcg/1mg tablet”
1140869356	“mestranol+norethisterone 50micrograms/1mg tablet”
1141157406	“norethisterone product”
1141168324	“oestradiol+norethisterone acetate 1mg/0.5mg tablet”
1141181298	“ethinylestradiol+norethisterone acetate 20mcg/1mg tablet”
1141181204	“ethinylestradiol+norgestimate 35mcg/250mcg tablet”
1141181218	“ethinylestradiol product”
1141181220	“ethinylestradiol”
1141181240	“ethinylestradiol+levonorgestrel 30mcg/150mcg tablet”
1141181286	“ethinylestradiol+desogestrel 20mcg/150mcg tablet”
1140869348	“ethinyloestradiol+norgestimate 35mcg/250mcg tablet”
1140868520	“estracombi tts patch”
1140857628	“gestone 10mg/1ml injection”
1140857704	“ovestin 250micrograms tablet”
1140857706	“oestriol 250micrograms tablet”
1140857708	“quinestradol”
1140857710	“pentovis 250micrograms tablet”
1140857714	“quinestrol”
1140857716	“estrovis 4mg tablet”
1140857736	“virormone 10mg/1ml injection”
1140857912	“desogestrel”
1140857918	“lynoestrenol”
1140857920	“minilyn tablet”
1140858324	“medroxyprogest 80mg/ml suspension 100ml”
1140858332	“sh-420 10mg tablet”
1140858338	“drostanolone propionate”
1140858340	“masteril 100mg/1ml injection”
1140858348	“noltam 10mg tablet”
1140864196	“climagest 1mg tablet”
1140864502	“testotop tts 15mg transdermal patch”
1140868400	“oestriol product”
1140868372	“climaval 1mg tablet”
1140868406	“conjugated oestrogens”
1140868408	“premarin 625micrograms tablet”
1140868420	“piperazine oestrone sulphate”
1140868422	“harmogen 1.5mg tablet”
1140868472	“vagifem 25mcg pessary”
1140868482	“tibolone”
1140868488	“allyloestrenol”
1140868490	“gestanin 5mg tablet”
1140868494	“dydrogesterone”
1140868496	“duphaston 10mg tablet”
1140868508	“cyclo-progynova 1mg tablet”
1140868512	“syntex menophase tablet”
1140868514	“trisequens tablet”
1140868518	“nuvelle tablet”
1140868524	“androcur 50mg tablet”
1140868526	“mesterolone”
1140868528	“pro-viron 25mg tablet”
1140868534	“primoteston depot 250mg/1ml oily injection”
1140868536	“restandol 40mg capsule”
1140868538	“sustanon 100 oily injection”
1140868584	“primolut-n 5mg tablet”
1140868586	“utovlan 5mg tablet”
1140868588	“progesterone product”
1140868590	“cyclogest 200mg suppository”
1140868614	“deca-durabolin 25mg/1ml oily injection”
1140868618	“stanozolol”
1140868620	“stromba 5mg tablet”
1140868628	“humegon 75iu injection (pdr for recon)+solvent”
1140869032	“dienoestrol”
1140869034	“ortho-gynest 500micrograms pessary”
1140869166	“ethinyloestradiol+desogestrel 20mcg/150mcg tablet”
1140869170	“conova 30 tablet”
1140869172	“ethinyloestradiol+ethynodiol diacetate 30mcg/2mg tablet”
1140869174	“eugynon 30 tablet”
1140869176	“logynon tablet”
1140869180	“microgynon 30 tablet”
1140869184	“ovran 30 tablet”
1140869186	“ovranette tablet”
1140869188	“schering pc4 tablet”
1140869190	“trinordiol tablet”
1140869248	“ethinyloestradiol+levonorgestrel 30mcg/150mcg tablet”
1140869254	“binovum tablet”
1140869256	“brevinor tablet”
1140869258	“neocon 1/35 tablet”
1140869260	“norimin tablet”
1140869262	“ovysmen tablet”
1140869264	“synphase tablet”
1140869266	“trinovum tablet”
1140869272	“neogest tablet”
1140869276	“micronor tablet”
1140869278	“noriday tablet”
1140869282	“noristerat 200mg/1ml oily injection”
1140869324	“loestrin 20 tablet”
1140869332	“minulet tablet”
1140869334	“femodene tablet”
1140869338	“tri-minulet tablet”
1140869340	“triadene tablet”
1140869346	“cilest tablet”
1140869352	“norinyl-1 tablet”
1140869354	“ortho-novin 1/50 tablet”
1140869360	“ethynodiol diacetate”
1140869362	“femulen tablet”
1140869366	“levonorgestrel”
1140869368	“microval tablet”
1140869370	“norgeston tablet”
1140870060	“polyestradiol phosphate”
1140870052	“honvan 100mg tablet”
1140870062	“estradurin 40mg injection (pdr for recon)+diluent”
1140870070	“stilboestrol”
1140870076	“apstil 1mg tablet”
1140876638	“cyproterone acetate+ethinyloestradiol”
1140879554	“estramustine”
1140884622	“oestrogen product”
1140884624	“fosfestrol”
1140884626	“mestranol”
1140884688	“hydroxyprogesterone”
1140909906	“estropipate”
1140910562	“gestonorone”
1140921088	“tridestra tablet”
1141157404	“ethinyloestradiol product”
1141157410	“levonorgestrel product”
1141157492	“mestranol product”
1141166196	“etonogestrel”
1141166354	“testoderm 6mg/24hours transdermal patch”
1141166366	“ethinyloestradiol+gestodene 20micrograms/75micrograms tablet”
1141167206	“oestrogel 0.06% gel”
1141177150	“adgyn medro 5mg tablet”
1141177158	“adgyn estro 2mg tablet”
1141177226	“adgyn combi 2mg tablet”
1141179822	“ethinylestradiol+drospirenone 30micrograms/3mg tablet”
1141179820	“drospirenone”
1141180988	“dienestrol”
1141181594	“estriol product”
1141181700	“estradiol product”
1141182794	“desogestrel product”
1141190580	“conjugated oestrogens 0.3mg / medroxyprogesterone 1.5mg tab”
1141192344	“cyproterone acetate+ethinylestradiol”
1141192874	“ethinylestradiol+norelgestromin 600mcg/6mg transdermal patch”
1141193272	“testogel 50mg gel 5g sachet”
1141202030	“estradot 25micrograms patch”
1141181818	“estradiol+norethisterone acetate 1mg/0.5mg tablet”

ssricodes%>%kbl()%>%
 scroll_box(width = "200px", height = "200px")

x
1140921600
1141180212
1140879540
1140867888
1140867878
1140867876
1140867884
1140867888
1140867860

codes%>%kbl()%>%
 scroll_box(width = "200px", height = "200px")

V1	V2
30620	Alanine_aminotransferase
30600	Albumin
30610	Alkaline_phosphatase
30630	Apolipoprotein_A
30640	Apolipoprotein_B
30650	Aspartate_aminotransferase
30710	C-reactive_protein
30680	Calcium
30690	Cholesterol
30700	Creatinine
30720	Cystatin_C
30660	Direct_bilirubin
30730	Gamma_glutamyltransferase
30740	Glucose
30750	Glycated_haemoglobin_(HbA1c)
30760	HDL_cholesterol
30770	IGF-1
30780	LDL_direct
30790	Lipoprotein_A
30800	Oestradiol
30810	Phosphate
30820	Rheumatoid_factor
30830	SHBG
30850	Testosterone
30840	Total_bilirubin
30860	Total_protein
30870	Triglycerides
30880	Urate
30670	Urea
30890	Vitamin_D

Extract columns from main bd dataframe

pheno<-bd%>%select(f.eid,f.21003.0.0, f.31.0.0,
                    f.189.0.0, f.54.0.0, f.22000.0.0,
                    f.1558.0.0,
                    paste("f.", codes$V1, ".0.0", sep=""),
                    f.22009.0.1:f.22009.0.20)
colnames(pheno)<-c("IID", "Age", "Sex",  
                    "Townsend","Assessment_center", "Geno_batch",
                    "AlcoholFreq",
                    codes$V2,
                    paste("PCA", 1:20,sep=""))

pheno<-as_tibble(pheno)

Extract cols from second UKB dataframe and join the two

pheno2<-bd_join4%>%select(f.eid, f.21001.0.0, 
                          f.20116.0.0,
                          f.3166.0.0)

colnames(pheno2)<-c("IID","BMI", 
                    "SmokeStatus",         
                    "blood_draw_time")


new<-left_join(pheno, pheno2, by="IID")
new<-as_tibble(new)
colnames(new)%>% kbl()%>%
    scroll_box(width = "600px", height = "200px")

x
IID
Age
Sex
Townsend
Assessment_center
Geno_batch
AlcoholFreq
Alanine_aminotransferase
Albumin
Alkaline_phosphatase
Apolipoprotein_A
Apolipoprotein_B
Aspartate_aminotransferase
C-reactive_protein
Calcium
Cholesterol
Creatinine
Cystatin_C
Direct_bilirubin
Gamma_glutamyltransferase
Glucose
Glycated_haemoglobin_(HbA1c)
HDL_cholesterol
IGF-1
LDL_direct
Lipoprotein_A
Oestradiol
Phosphate
Rheumatoid_factor
SHBG
Testosterone
Total_bilirubin
Total_protein
Triglycerides
Urate
Urea
Vitamin_D
PCA1
PCA2
PCA3
PCA4
PCA5
PCA6
PCA7
PCA8
PCA9
PCA10
PCA11
PCA12
PCA13
PCA14
PCA15
PCA16
PCA17
PCA18
PCA19
PCA20
BMI
SmokeStatus
blood_draw_time

dim(new)

## [1] 502527     60

#Fix hyphenated column names
colnames(new)[colnames(new) %in% 
                  c("C-reactive_protein", "Glycated_haemoglobin_(HbA1c)", 
                    "IGF-1")]=
    c("C_reactive_protein","HbA1c","IGF_1")

Remove withdrawn participants

nrow(withdrawn)

## [1] 114

new<-new[!(new$IID %in% withdrawn$V1), ]

QC participants via output of UKB_participantQC.R

nrow(QCids)

## [1] 356980

new<-new[(new$IID %in% QCids$IID),]

Add Vegetarian Columns (output from DefineVeg step)

Veg5yr=as_tibble(read.csv("../VegMarkdown/DefineVeg-09062022.csv"))

Vegadd=Veg5yr%>%select(IID,Veg5yr1)
new<-left_join(new, Vegadd)

## Joining, by = "IID"

new=new%>%filter(!is.na(Veg5yr1))
table(new$Veg5yr1)

## 
##      0      1   <NA> 
## 153047   2328      0

#     0      1 
#153047   2328

Update covariate columns for use in our models

Age²

new$Age2<-new$Age^2

Smoking: Turn 20116 SmokeStatus into two new dummy variables

new$PreviousSmoker=0
new$PreviousSmoker[new$SmokeStatus==1]=1
new$CurrentSmoker=0
new$CurrentSmoker[new$SmokeStatus==2]=1

Alcohol: a binary measure of current drinking for self-reported current drinkers (three or more times a week against less than three times a week). Ref:https://doi.org/10.1038/s41467-019-12424-x

new$AlcoholFreq=mapvalues(new$AlcoholFreq,
                from=c("Prefer not to answer", "Never", "Special occasions only",
                "One to three times a month", "Once or twice a week",
                "Three or four times a week", "Daily or almost daily"),
                to=c(NA,0,0,0,0,1,1))
new$AlcoholFreq=as.numeric(as.character(new$AlcoholFreq))
table(new$AlcoholFreq)%>%kbl()

Var1	Freq
0	77958
1	77327
NA	90

Make dummy 0/1 cols for each assessment center

I didn’t use assessment center but this code is here for our labmates to use.

table(pheno$Assessment_center)

## 
## 10003 11001 11002 11003 11004 11005 11006 11007 11008 11009 11010 11011 11012 
##  3793 13939 14058 17878 18647 17195 19433 29409 28321 37002 44198 43012 12582 
## 11013 11014 11016 11017 11018 11020 11021 11022 11023  <NA> 
## 33876 30396 32816 21286 28875 27380 25501  2281   649     0

centers<-unique(new$Assessment_center)
centercols<-paste("center", 1:22, sep="")
new[centercols]<-0

for (i in 1:length(centers)){
    new[new$Assessment_center==centers[i],][centercols[i]]<-1
}

new<-new%>%select(-Assessment_center)

Genotype batch

new$Geno_batch1<-0

new$Geno_batch1[new$Geno_batch>0]<-1
new$Geno_batch<-new$Geno_batch1
new<-new%>%select(-Geno_batch1)

table(new$Geno_batch)%>%kbl()

Var1	Freq
0	15940
1	139435
NA	0

Switch sex factor to numeric

new$Sex<-mapvalues(as.character(new$Sex), 
                     c("Male", "Female"), c(1,2))
new$Sex<-as.numeric(new$Sex)

Z-transformation of blood draw times

new$zblood<-str_split(new$blood_draw_time, " ", simplify = TRUE)[,2] #Take just time
new$zblood<-str_split(new$zblood, ":", simplify=TRUE)[,c(1,2)] #take minutes and seconds

new$zblood[,2]<-as.numeric(new$zblood[,2])/60 #convert minutes to hours

new$zblood<-as.numeric(new$zblood[,1])+as.numeric(new$zblood[,2]) #add together

new$zblood<-(new$zblood - mean(new$zblood, na.rm=TRUE)) / sd(new$zblood, na.rm=TRUE) #standardize

#This column with the colons ":" messes up many softwares so remove it
new=new%>%select(-blood_draw_time)

Z transform Townsend score

new$zTownsend=(new$Townsend - mean(new$Townsend, na.rm=TRUE)) /sd(new$Townsend, na.rm=TRUE)

Z transform Age

new$zAge=(new$Age - mean(new$Age, na.rm=TRUE)) / sd(new$Age, na.rm=TRUE)

log SHBG

new$logSHBG=log(new$SHBG)

Calculate eGFR

CKD-EPI creatinine-cystatin equation (2021)

https://www.kidney.org/content/ckd-epi-creatinine-cystatin-equation-2021

#Creatinine default units umol/L 
#Formula needs mg/dL
#To convert μmol/l to mg/dL, divide by 88.4. 
new$Scr<-new$Creatinine/88.42

#Cystatin C default units: mg/L
#Formula needs mg/L. No conversion necessary
#eGFR: CKD-EPI Creatinine-Cystatin equation: 
new$eGFR<-0 #initialize col
new$eGFR[new$Sex=="1"]<-135*
    (pmin((new$Scr[new$Sex=="1"])/0.9, 1))^(-0.144) *
    (pmax(new$Scr[new$Sex=="1"]/0.9, 1))^(-0.544) * 
    (pmin((new$Cystatin_C[new$Sex=="1"])/0.8, 1))^(-0.323) *
    (pmax((new$Cystatin_C[new$Sex=="1"])/0.8, 1))^(-0.778) *
    0.9961^(new$Age[new$Sex=="1"])

new$eGFR[new$Sex=="2"]<-135*
    (pmin((new$Scr[new$Sex=="2"])/0.7, 1))^(-0.219) *
    (pmax(new$Scr[new$Sex=="2"]/0.7, 1))^(-0.544) * 
    (pmin((new$Cystatin_C[new$Sex=="2"])/0.8, 1))^(-0.323) *
    (pmax((new$Cystatin_C[new$Sex=="2"])/0.8, 1))^(-0.778) *
    0.9961^(new$Age[new$Sex=="2"]) * 0.963 #for females
#Winsorize
new$eGFR<-Winsorize(new$eGFR, minval = 15, maxval = 200, 
                    probs = c(0,1), type=1)

Add bioavail and free testosterone

#Calculate bioavailable T
cbatoutput<-cbat(new$Testosterone, SHBG = new$SHBG, 
                 ALB = new$Albumin)
new$bioavailableTest<-cbatoutput$bioavailableTest
new$freeTest=cbatoutput$freeTest

Find statin users

statincols<-c(sprintf("f.20003.0.%s", 0:47))
statincodes<-c(1141146234,1141192414,1140910632,1140888594,1140864592,
    1141146138,1140861970,1140888648,1141192410,
    1141188146,1140861958,1140881748,1141200040)

manyColsToDummy(statincodes, bd_join4[,statincols], "statinoutput")
statinoutput$statins<-rowSums(statinoutput) 
statinoutput$statins[statinoutput$statins>1]<-1

statinoutput$IID<-bd_join4$f.eid

statinoutput<-statinoutput%>%select(IID, statins)

new<-left_join(new, statinoutput, by="IID")

Adjust lipid phenotypes

Cholesterol, LDL-C, and Apolipoprotein B¹

sum(new$statins, na.rm=T)

## [1] 22282

new$Cholesterolold=new$Cholesterol
new$LDL_directold=new$LDL_direct
new$Apolipoprotein_Bold=new$Apolipoprotein_B

new$Cholesterol[which(new$statins==1)]<-
    new$Cholesterol[which(new$statins==1)]/0.749

new$LDL_direct[which(new$statins==1)]<-
        new$LDL_direct[which(new$statins==1)]/0.684


new$Apolipoprotein_B[which(new$statins==1)]<-
        new$Apolipoprotein_B[which(new$statins==1)]/0.719

Taking hormone medications

medcols<-c("f.eid", sprintf("f.20003.0.%s", 0:47))

medcodes<-hormonemeds$code #sourced from sexHormoneMeds.csv

names(medcodes)<-hormonemeds$name

manyColsToDummy(medcodes, bd_join4[,medcols], "medoutput")

colnames(medoutput)<-names(medcodes)

medoutput$tookMed<-apply(medoutput, 1, sum, na.rm=TRUE)

nrow(medoutput[medoutput$tookMed>0,])

## [1] 12665

medoutput$tookMed[medoutput$tookMed>1]<-1

medoutput$IID<-bd_join4$f.eid

medoutput2<-medoutput%>%select(IID, tookMed)

new<-left_join(new, medoutput2, by= "IID")

Remove testosterone values for those taking hormone medication

sum(is.na((new$Testosterone)))

## [1] 21032

new$Testosterone[which(!is.na(new$Testosterone) & new$tookMed==1)]<-NA
sum(is.na((new$Testosterone)))

## [1] 24118

sum(is.na((new$bioavailableTest)))

## [1] 33053

new$bioavailableTest[which(!is.na(new$bioavailableTest) & new$tookMed==1)]<-NA
sum(is.na((new$bioavailableTest)))

## [1] 35784

Taking SSRIs

manyColsToDummy(ssricodes, bd_join4[,medcols], "ssrioutput")

colnames(ssrioutput)<-names(ssricodes)

ssrioutput$tookSSRI<-apply(ssrioutput, 1, sum, na.rm=TRUE)

nrow(ssrioutput[ssrioutput$tookSSRI>0,])

## [1] 19309

ssrioutput$tookSSRI[ssrioutput$tookSSRI>1]<-1

ssrioutput$IID<-bd_join4$f.eid

ssrioutput2<-ssrioutput%>%select(IID, tookSSRI)

new<-left_join(new, ssrioutput2, by= "IID")

Indirect rank-based inverse normal transformation (I-INT) (didn’t use in this study)²

Function

inversenormal <- function(x) {
    # inverse normal if you have missing data 
    return(qnorm((rank(x,na.last="keep", ties.method="random")-0.5)/sum(!is.na(x))))
}

Make RINT transformed columns, add back to original table

phenotypes=new%>%select(Alanine_aminotransferase:Vitamin_D, eGFR,bioavailableTest, freeTest)
 colnames(phenotypes)%>%kbl()%>% 
    scroll_box(width = "600px", height = "200px")

x
Alanine_aminotransferase
Albumin
Alkaline_phosphatase
Apolipoprotein_A
Apolipoprotein_B
Aspartate_aminotransferase
C_reactive_protein
Calcium
Cholesterol
Creatinine
Cystatin_C
Direct_bilirubin
Gamma_glutamyltransferase
Glucose
HbA1c
HDL_cholesterol
IGF_1
LDL_direct
Lipoprotein_A
Oestradiol
Phosphate
Rheumatoid_factor
SHBG
Testosterone
Total_bilirubin
Total_protein
Triglycerides
Urate
Urea
Vitamin_D
eGFR
bioavailableTest
freeTest

rint=as_tibble(sapply(phenotypes, inversenormal))
colnames(rint)=paste(colnames(rint), "rint", sep="_")
rint$IID<-new$IID

new<-left_join(new, rint, by="IID")

Write output

outdir="pheno"
suppressWarnings(dir.create(outdir))
new$FID<-new$IID

new<-new%>%select(FID, everything())

participants<-new%>%select(FID,IID)

write.table(new,
    paste(outdir, "/BioxVeg_pheno_09112022.txt", sep=""),
    row.names=FALSE, quote=FALSE)

write.csv(new,
        paste(outdir, "/BioxVeg_pheno_09112022.csv", sep=""),
        row.names=FALSE, quote=FALSE)

Westerman 2022:for those with self-reported use of a statin medication, each of these biomarkers was divided by an adjustment factor (0.749, 0.684, and 0.719, respectively) that had been empirically estimated by Sinnott-Armstrong and colleagues in the same population.↩︎

Calculate residuals

resdat<-matrix(NA,nrow(new),length(phenotypes))
colnames(resdat)<-phenotypes

for (p in 1:length(phenotypes)){
    assign(paste("lm", phenotypes[p], sep="_"), 
           lm(new[[phenotypes[p]]] ~ 
                Age,Sex, zTownsend, 
                #ADD COVARIATES HERE
              data=new, 
              na.action=na.exclude))
    lmname<-paste("lm", phenotypes[p], sep="_")
    lmobj<-get(lmname)
    resdat[,p]<-resid(lmobj)
}

resdat<-as_tibble(resdat)
colnames(resdat)<-paste(phenotypes, "res", sep="_")

resdat_inv<-matrix(NA,nrow(new),length(phenotypes))
colnames(resdat_inv)<-paste(phenotypes, "resinv", sep="_")

inversenormal <- function(x) { 
    # inverse normal if you have missing data 
    return(qnorm((rank(x,na.last="keep", ties.method="random")-0.5)/sum(!is.na(x)))) 
} 

for (i in 1:length(phenotypes)){
    resdat_inv[,i] <- inversenormal(resdat[,i])
}

resdat_inv<-as_tibble(as.data.frame(resdat_inv))
resdat_inv$IID<-new$IID

new1<-left_join(new, resdat_inv, by="IID")

### 

### Direct rank-based inverse normal transformation (D-INT)

#### Phenotypes

phenotypes=new%>%select(Alanine_aminotransferase:Vitamin_D, eGFR, 
                    bioavailableTest)
colnames(phenotypes)

↩︎

Phenotype QC

Mike Francis

2022-10-09 21:41:24

Prepare the phenotype table for biomarker traits analyzed in this study.

Load packages + make it so table always displays NA’s

Load UK Biobank dataset

Load in second UK Biobank dataset

Read in extra files

Extract columns from main bd dataframe

Extract cols from second UKB dataframe and join the two

Remove withdrawn participants

QC participants via output of UKB_participantQC.R

Add Vegetarian Columns (output from DefineVeg step)

Update covariate columns for use in our models

Make dummy 0/1 cols for each assessment center

Genotype batch

Switch sex factor to numeric

Z-transformation of blood draw times

Z transform Townsend score

Z transform Age

log SHBG

Calculate eGFR

Add bioavail and free testosterone

Find statin users

Adjust lipid phenotypes

Taking hormone medications

Remove testosterone values for those taking hormone medication

Taking SSRIs

Indirect rank-based inverse normal transformation (I-INT) (didn’t use in this study)²

Function

Make RINT transformed columns, add back to original table

Write output

Calculate residuals

Phenotype QC

Mike Francis

2022-10-09 21:41:24

Prepare the phenotype table for biomarker traits analyzed in this study.

Load packages + make it so table always displays NA’s

Load UK Biobank dataset

Load in second UK Biobank dataset

Read in extra files

Extract columns from main bd dataframe

Extract cols from second UKB dataframe and join the two

Remove withdrawn participants

QC participants via output of UKB_participantQC.R

Add Vegetarian Columns (output from DefineVeg step)

Update covariate columns for use in our models

Make dummy 0/1 cols for each assessment center

Genotype batch

Switch sex factor to numeric

Z-transformation of blood draw times

Z transform Townsend score

Z transform Age

log SHBG

Calculate eGFR

Add bioavail and free testosterone

Find statin users

Adjust lipid phenotypes

Taking hormone medications

Remove testosterone values for those taking hormone medication

Taking SSRIs

Indirect rank-based inverse normal transformation (I-INT) (didn’t use in this study)2

Function

Make RINT transformed columns, add back to original table

Write output

Calculate residuals

Indirect rank-based inverse normal transformation (I-INT) (didn’t use in this study)²