Find the self-identified vegetarians in the UK Biobank dietary recall survey instances
Assess their consistency with other parts of the dietary survey.
Obtain a list of high quality vegetarians to use for subsequent analyses.
Veg5yr1 is the vegetarian variable used in the main analysis. Being categorized as a vegetarian means participants adhered to all four of these criteria:
Self-identified as a vegetarian or vegan the first time they were asked on a 24HR survey
Didn’t indicate eating any meat or fish on that same first 24HR survey
Indicated no major dietary changes over past 5 years
No meat or fish eating indicated on initial assessment
The biomarker data that we use in this analysis was taken at the IA. Therefore this is a combination of several columns that describe the quality of vegetarianism at that same time point.
There are also some plots here showing general qualities of the 24HR recall survey that were done in my data exploration that don’t factor into the main analysis (i.e., Plot: How many repeat participants were in each instance of the 24HR? , Plot: Percent consistent vegetarians vs number of recall survey’s taken across 3 years, [Identify those who were vegetarian at two consecutive 24HR instances])
suppressMessages(silent <- lapply(
c("plyr", "dplyr", "tidyverse", "data.table", "vroom", "knitr","kableExtra","cowplot"),
character.only=T))
library,
= function (..., useNA = 'always') base::table(..., useNA = useNA)
table
<- function(data, ...) {
mutate_when <- eval(substitute(alist(...)))
dots for (i in seq(1, length(dots), by = 2)) {
<- eval(dots[[i]], envir = data)
condition <- eval(dots[[i + 1]],
mutations envir = data[condition, , drop = FALSE])
names(mutations)] <- mutations
data[condition,
}
data }
## [1] 502527 5172
=vroom("/Users/mike/Documents/R_files/UKBpheno/pheno/ukbXXXXX.tab", delim="\t", show_col_types = FALSE)
bd=as_tibble(bd)
bdsource("src/components/ukbXXXXX_factordata.R") #file provided by UKB "ukbXXXXX_loaddata.R" without the loading part, to label the responses in survey questions
dim(bd)
“daycols” are days of the week that the 24HR was taken, i.e. a proxy for whether the survey was completed. In these columns, NA=survey wasn’t taken that instance.
We change the days of the week to 1 because we’re not interested in which day it actually was.
#First, remove withdrawn participants
=read.csv("src/components/w48818_20220222.csv", header = FALSE)
withdrawnnrow(withdrawn)
## [1] 114
=bd[!(bd$f.eid %in% withdrawn$V1), ]
bdnrow(bd)
## [1] 502413
#Identify columns with days of the week
<-c("f.20080.0.0", "f.20080.1.0", "f.20080.2.0",
daycols"f.20080.3.0", "f.20080.4.0")
<-bd[,c("f.eid", daycols)]
bd1#Change all the factors to characters
<-sapply(bd1[,daycols], as.character)
bd1[,daycols]
#Change NA's to zeros and days to 1's
is.na(bd1[,daycols]))]<-"0"
bd1[,daycols][(!="0"] <-"1"
bd1[,daycols][bd1[,daycols]
#Change these back to numeric
<-sapply(bd1[,daycols], as.integer)
bd1[,daycols]
#Now make a new column, everyone with rowSums zero never took recall
#and those with >0 took it at least once
$times_taken_24HR<-rowSums(bd1[,daycols])
bd1$ever_took_24HR<-0
bd1$ever_took_24HR[bd1$times_taken_24HR>0]<-1
bd1$times_taken_24HR[bd1$times_taken_24HR==0]<-NA
bd1
sum(bd1$ever_took_24HR)
## [1] 210967
#TP = time point
colnames(bd1)<-c("IID", "RecallTP0", "RecallTP1",
"RecallTP2","RecallTP3","RecallTP4",
"times_taken_24HR", "ever_took_24HR")
#Optional save here
#write.csv(bd1, "UKB_24hourRecall-ParticipantInstancesTaken.csv", quote=F, row.names=F)
During what time periods did the questionnaire cycles occur?
=bd1%>%select(RecallTP0:RecallTP4)
perinstance
<-bd1%>%select(times_taken_24HR)%>%
timestakengroup_by(times_taken_24HR)%>%summarise(N=n())
%>%kbl() timestaken
times_taken_24HR | N |
---|---|
1 | 84145 |
2 | 48108 |
3 | 42482 |
4 | 30467 |
5 | 5765 |
NA | 291446 |
=ggplot(data=timestaken, aes(x=times_taken_24HR, y=N)) +
p1geom_bar(stat="identity", width=0.5, fill ="#8C8C8C")+
ylab("Number of participants")+
xlab("Number of 24 hour recall surveys taken")+
ggtitle(paste("N = ", sum(timestaken$N[1:5]), sep=""))+
theme_bw()
p1
## Warning: Removed 1 rows containing missing values (position_stack).
=bd1[paste("RecallTP", 0:4, sep="")]
take=take[!rowSums(take)==0,]
take
=take%>%
take.plotgather(key = v, value = total, RecallTP0:RecallTP4, na.rm = T)%>%
group_by(v)%>%count(total)%>%filter(total==1)
colnames(take.plot)=c("RecallTP", "Group", "n")
$RecallTP=0:4
take.plotas_tibble(take.plot)%>%kbl()
RecallTP | Group | n |
---|---|---|
0 | 1 | 70692 |
1 | 1 | 100577 |
2 | 1 | 83243 |
3 | 1 | 103765 |
4 | 1 | 100223 |
This next script chunk is incredibly repetitive and I’m sure there is a more efficient way to write it but it was faster for me to just write it “manually”.
=paste("repeat", 0:4, sep="")
rep=0
take[rep]1]]=0
take[rep[nrow(take[!rowSums(take)==0,])
## [1] 210967
#TP1, previously took once
$repeat1[take$RecallTP0==1 &
take$RecallTP1==1] = 1
take
#TP2, previously took at least once
$repeat2[(take$RecallTP0==1 |
take$RecallTP1==1) &
take$RecallTP2==1] = 1
take
#TP2, previously took twice
$repeat2[(take$RecallTP0==1 &
take$RecallTP1==1) &
take$RecallTP2==1] = 2
take
#TP3, previously took at least once
$repeat3[(take$RecallTP0==1 |
take$RecallTP1==1|
take$RecallTP2==1) &
take$RecallTP3==1] = 1
take
#TP3, previously took at least twice
$repeat3[(
take$RecallTP0==1 & take$RecallTP1==1) |
(take$RecallTP1==1 & take$RecallTP2==1) |
(take$RecallTP0==1 & take$RecallTP2==1)
(take& take$RecallTP3==1] = 2
)
#TP3, previously took three times
$repeat3[(
take$RecallTP0==1 &
take$RecallTP1==1 &
take$RecallTP2==1 &
take$RecallTP3==1) ] = 3
take
#TP4, previously took at least once
$repeat4[(take$RecallTP0==1 |
take$RecallTP1==1|
take$RecallTP2==1|
take$RecallTP3==1) &
take$RecallTP4==1] = 1
take
#TP4, previously took at least twice
$repeat4[(
take$RecallTP0==1 & take$RecallTP1==1) |
(take$RecallTP0==1 & take$RecallTP2==1) |
(take$RecallTP0==1 & take$RecallTP3==1) |
(take$RecallTP1==1 & take$RecallTP2==1) |
(take$RecallTP1==1 & take$RecallTP3==1) |
(take$RecallTP2==1 & take$RecallTP3==1)
(take& take$RecallTP4==1] = 2
)
#TP4, previously took at least three times
$repeat4[(
take$RecallTP0==1 & take$RecallTP1==1 & take$RecallTP2==1) |
(take$RecallTP0==1 & take$RecallTP2==1 & take$RecallTP3==1) |
(take$RecallTP1==1 & take$RecallTP2==1 & take$RecallTP3==1)
(take& take$RecallTP4==1] = 3
)
#TP4, previously took four times
$repeat4[
take$RecallTP0==1 & take$RecallTP1==1
take& take$RecallTP2==1 & take$RecallTP3==1
& take$RecallTP4==1] = 4
#NA for those who didn't take the recall in that instance
$repeat0[take$RecallTP0==0]=NA
take$repeat1[take$RecallTP1==0]=NA
take$repeat2[take$RecallTP2==0]=NA
take$repeat3[take$RecallTP3==0]=NA
take$repeat4[take$RecallTP4==0]=NA
take
sapply(take, table)[6:10] #Count repeats at each instance
## $repeat0
##
## 0 <NA>
## 70692 140275
##
## $repeat1
##
## 0 1 <NA>
## 79599 20978 110390
##
## $repeat2
##
## 0 1 2 <NA>
## 27105 46006 10132 127724
##
## $repeat3
##
## 0 1 2 3 <NA>
## 19639 38856 37764 7506 107202
##
## $repeat4
##
## 0 1 2 3 4 <NA>
## 13932 20982 34122 25422 5765 110744
=data.frame(0:4, sapply(take, sum)[1:5], row.names = NULL)
take.plotcolnames(take.plot)=c("RecallTP", "N")
=data.frame()
yfor (i in 0:4){
=take%>%group_by_(paste("RecallTP", i,sep=""), paste("repeat",i,sep=""))%>%
xsummarise(count=n())%>%
rename_("RecallTP" = paste("RecallTP", i,sep=""))%>%
rename_("group" = paste("repeat", i,sep=""))
=x[x$RecallTP==1,]
x$RecallTP=i
x=rbind(y,x)
y }
## `summarise()` has grouped output by 'RecallTP0'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'RecallTP1'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'RecallTP2'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'RecallTP3'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'RecallTP4'. You can override using the `.groups` argument.
$group=mapvalues(y$group, from=c(0,1,2,3,4),
yto=c("First time taking",
"Second time taking",
"Third time taking",
"Fourth time taking",
"Fifth time taking"))
$group=factor(y$group,
ylevels =rev(unique(y$group)))
=ggplot(data=y, aes(x=RecallTP, y=count, fill=group, color=group)) +
p2geom_bar(stat="identity", width=0.5, size=1)+
ylab("Count participants")+
xlab("Instance of 24 hour recall")+
theme_bw()+
guides(fill = guide_legend(reverse = TRUE),
color=guide_legend(reverse = TRUE))+
theme(legend.title=element_blank())+
scale_fill_brewer(palette="Spectral")+
scale_colour_manual(guide="none", values=c(rep("NA", 4), "black"))
p2
<-bd1
new
#PUT AGE AND SEX COLUMNS
<-bd%>%select(f.eid, f.21003.0.0, f.31.0.0)
agesexcolnames(agesex)<-c("IID", "Age", "Sex")
<-left_join(new,agesex)%>%select(IID, Age, Sex, everything()) new
## Joining, by = "IID"
f.1538 = “Have you made any major changes to your diet in the last 5 years?”
0 = “No”
1= “Yes, because of illness | Yes, because of other reasons | Prefer not to answer”
=bd%>%select(f.eid, f.1538.0.0)
changevarcolnames(changevar)=c("IID", "DietChanges5yr")
2]<-sapply(changevar[2], mapvalues,
changevar[c("Prefer not to answer",
"Yes, because of illness", "Yes, because of other reasons",
"No"),
c(1,1,1,0))
table(changevar$DietChanges5yr)%>%kbl()
Var1 | Freq |
---|---|
0 | 303424 |
1 | 198092 |
NA | 897 |
=left_join(new,changevar) new
## Joining, by = "IID"
#Category 100052: Diet. Initial assessment
<-c("f.1329.0.0", "f.1339.0.0",
IAcols"f.1349.0.0", "f.1359.0.0",
"f.1369.0.0", "f.1379.0.0",
"f.1389.0.0")
names(IAcols)<-c("IAOilyfish", "IANonoilyfish",
"IAProcessedmeat","IAPoultry",
"IABeef","IALamb",
"IAPork")
<-as_tibble(sapply(bd[IAcols], as.character))
IAcols.newcolnames(IAcols.new)<-names(IAcols)
#Put a TRUE in this column IAveg if they answered Never in all the meat+fish columns
$IAveg<-rowSums(bd[IAcols]=="Never")==7
IAcols.new
table(IAcols.new$IAveg)%>%kbl()
Var1 | Freq |
---|---|
FALSE | 492390 |
TRUE | 9126 |
NA | 897 |
$IID<-bd$f.eid
IAcols.new<-left_join(new, IAcols.new) new
## Joining, by = "IID"
Add the column “Age when last ate meat”, which wasn’t used in the analysis.
# IA Age when last ate meat/"Never eaten meat" -----------------------
#https://biobank.ndph.ox.ac.uk/ukb/field.cgi?id=3680
#"How old were you when you last ate any kind of meat?
#(Enter "0" if you have never eaten meat in your lifetime)"
#on the initial assessment
<-bd%>%select(f.eid, f.3680.0.0)
IA.ageLastAteMeatcolnames(IA.ageLastAteMeat)<-c("IID", "IAageLastAteMeat")
<-left_join(new, IA.ageLastAteMeat) new
## Joining, by = "IID"
=new%>%select(IAageLastAteMeat, IAOilyfish:IAPork)%>%filter(IAageLastAteMeat==0)
neverate
sapply(neverate, table)
## $IAageLastAteMeat
##
## 0 <NA>
## 1230 0
##
## $IAOilyfish
##
## 2-4 times a week 5-6 times a week Do not know
## 28 5 7
## Less than once a week Never Once a week
## 49 1098 40
## Once or more daily <NA>
## 3 0
##
## $IANonoilyfish
##
## 2-4 times a week 5-6 times a week Do not know
## 44 3 6
## Less than once a week Never Once a week
## 44 1069 59
## Once or more daily Prefer not to answer <NA>
## 4 1 0
##
## $IAProcessedmeat
##
## Never <NA>
## 1230 0
##
## $IAPoultry
##
## Never <NA>
## 1230 0
##
## $IABeef
##
## Never <NA>
## 1230 0
##
## $IALamb
##
## Never <NA>
## 1230 0
##
## $IAPork
##
## Never <NA>
## 1230 0
=c("Once or more daily","5-6 times a week","2-4 times a week",
lev"Once a week","Less than once a week",
"Do not know", "Never")
$IAOilyfish<-factor(neverate$IAOilyfish, levels=lev)
neverate$IANonoilyfish<-factor(neverate$IANonoilyfish, levels=lev)
neverate<-neverate%>%group_by(IAOilyfish)%>%summarise(N=n())%>%mutate(Category="Oily fish")
x1<-neverate%>%group_by(IANonoilyfish)%>%summarise(N=n())%>%mutate(Category="Non-oily fish")
x2colnames(x1)[1]="Frequency"
colnames(x2)[1]="Frequency"
<-rbind(x1, x2)
neverate.plot<-neverate.plot[-15,] #take out the "prefer not to answer" 1 person
neverate.plot
ggplot(data=neverate.plot, aes(x=Category, y=N, fill=Frequency)) +
geom_bar(position="stack", stat="identity", width=0.5)+
ylab("Number of participants")+
ggtitle(paste('Fish eating frequency of those who, on the same survey,
Answered 0 to:
"How old were you when you last ate any kind of meat?
(Enter "0" if you have never eaten meat in your lifetime)"
N=', nrow(neverate)-1, sep=""))+
theme_bw()+
scale_fill_brewer(palette="Spectral")
This next section solves the problem that if someone has an NA in field 20086 (24hr-recall: special diet followed), it could mean two possible things:
They did not answer this question in the affirmative or
They didn’t take the 24h recall in that instance,
since it is marked as NA in both of these situations. (I.e. there is no “I didn’t follow a special diet” response in UKB Data-Coding 76).
Therefore response in this column was compared to the column indicating whether they took the survey in that instance to get that information.1
The question on the survey:
<-c("f.eid", "f.20086.0.0", "f.20086.1.0",
Special"f.20086.2.0", "f.20086.3.0", "f.20086.4.0")
names(Special)<- c("IID", "SpecialVeg0","SpecialVeg1", "SpecialVeg2",
"SpecialVeg3", "SpecialVeg4")
<-as_tibble(sapply(bd[Special], as.character))
Special.colscolnames(Special.cols)<-names(Special)
The array index for f.20086 runs from 0 to 5 (https://biobank.ndph.ox.ac.uk/ukb/field.cgi?id=20086). This handy piece of code will consolidate anyone who said vegetarian in indices 1:5 and put that information in array 0 of the corresponding survey instance so we don’t have to worry about the additional array slots for those people that had two special diets.
It also seems that at instance 0 UKB had potentially not enabled this “multiple special diets” feature as f.20086.0.1:f.20086.0.5 are all empty in all participants. However in instances 1:4 there are many participants who indicated multiple special diets.
#to see that f.20086.0.1:f.20086.0.5 are all empty:
#sapply(bd%>%select(f.20086.0.1:f.20086.0.5), table)
#For vegetarians
for (i in 0:4){
=(sprintf("f.20086.%1.0f.%1.0f", i,1:5))
cols.i#cols.0=(sprintf("f.20086.%1.0f.%1.0f", i,0))
sprintf("SpecialVeg%1.0f", i)][
Special.cols[apply(bd[cols.i], 1,
function(x) any(x %in% c("Vegetarian"))),
="Vegetarian"
]
}
#For vegans
for (i in 0:4){
=(sprintf("f.20086.%1.0f.%1.0f", i,1:5))
cols.i#cols.0=(sprintf("f.20086.%1.0f.%1.0f", i,0))
sprintf("SpecialVeg%1.0f", i)][
Special.cols[apply(bd[cols.i], 1,
function(x) any(x %in% c("Vegan"))),
="Vegan"
] }
Vegetarian = 1 ; Vegan = 1; all others and NA get 0 here, Will switch “true NAs” who didn’t take the survey at this instance back to NA in next step
=Special.cols
Special.veg2:6]<-sapply(Special.cols[2:6], mapvalues,
Special.veg[c(NA, "Low calorie", "Gluten-free",
"Lactose-free", "Other",
"Vegetarian", "Vegan"),
c(0,0,0,0,0,1,1))
<-as_tibble(sapply(Special.veg, as.numeric))
Special.veg<-left_join(new, Special.veg) new
## Joining, by = "IID"
Switch those zeros in Special Veg columns back to NAs for those who didn’t take survey at that instance
for (i in 0:4){
paste("SpecialVeg", i, sep="")][
new[paste("RecallTP", i, sep="")]==0
new[=NA
] }
Make separate column indicating self-identified vegans
<-Special.cols
Special.vegan2:6]<-sapply(Special.cols[2:6], mapvalues,
Special.vegan[c(NA, "Low calorie", "Gluten-free",
"Lactose-free", "Other",
"Vegetarian", "Vegan"),
c(0,0,0,0,0,0,1))
colnames(Special.vegan)=c("IID", "SpecialVegan0","SpecialVegan1",
"SpecialVegan2",
"SpecialVegan3", "SpecialVegan4")
<-as_tibble(sapply(Special.vegan, as.numeric))
Special.vegan<-left_join(new, Special.vegan) new
## Joining, by = "IID"
#Switch those zeros in Special Veg columns back to NAs
#for those who didn't take survey at that instance
for (i in 0:4){
paste("SpecialVegan", i, sep="")][
new[paste("RecallTP", i, sep="")]==0
new[=NA
] }
Find proportion of times each participant answered vegetarian on 24H
$SpecialTimesVeg=ifelse(
newapply(
is.na(new[paste("SpecialVeg", 0:4, sep="")]),1,all),NA,
rowSums(new[paste("SpecialVeg", 0:4, sep="")], na.rm = T
))
#Proportion of times taken 24HR that answered vegetarian
$SpecialPropVeg=
new$SpecialTimesVeg/new$times_taken_24HR new
Tracking the percentage of participants who self-identified as vegetarian every time they took the 24HR (100% of responses were vegetarian/vegan) versus the number of times the recall was taken by said participants
#People who said they were vegetarian at least once
<-new%>%select(times_taken_24HR, SpecialTimesVeg)%>%
timesfilter(complete.cases(.), SpecialTimesVeg>0)
=nrow(times)
nveg
<-matrix(data=NA, nrow=5, ncol=4)
times.resfor (i in 1:5){
1]<-i
times.res[i,2]<-nrow(times%>%filter(times_taken_24HR==i))
times.res[i,3]<-nrow(times%>%filter(SpecialTimesVeg==i,
times.res[i,==i))
times_taken_24HR4]<-(times.res[i,3]/times.res[i,2])*100
times.res[i,
}
colnames(times.res)<-c("Times24HR",
"N took 24HR this many times and said vegetarian at least once on a survey",
"N who took 24HR this many times said Veg in every 24HR they took",
"PercentVeg")
<-as.data.frame(times.res)
times.res$Times24HR<-as.factor(times.res$Times24HR)
times.res
%>%kbl() times.res
Times24HR | N took 24HR this many times and said vegetarian at least once on a survey | N who took 24HR this many times said Veg in every 24HR they took | PercentVeg |
---|---|---|---|
1 | 2781 | 2781 | 100.00000 |
2 | 2130 | 1380 | 64.78873 |
3 | 2162 | 1229 | 56.84551 |
4 | 1672 | 925 | 55.32297 |
5 | 370 | 168 | 45.40541 |
=c("#36E072", "#2EC062", "#27A152", "#1F8141", "#176131")
greens
=ggplot(data=times.res, aes(x=Times24HR,
p3y=PercentVeg, fill=Times24HR))+
geom_bar(stat="identity", width=0.5)+
ggtitle(paste("N=", nveg,"participants who indicated
being vegetarian/vegan at least once on
a 24HR dietary assessment", sep=" "))+
xlab("Number of 24 hour recall surveys taken")+
ylab("Vegetarian/vegan consistently
across all surveys (%)")+
theme_bw()+
scale_fill_manual(values=greens, guide="none")
p3
(First 24HR is called “next” in the script, as in next after the initial assessment).
=new%>%select(SpecialVeg0:SpecialVeg4)
specialVeg
#Find positions of all NA's
=apply(specialVeg, 1,FUN = function(x) which(!is.na(x)),
NonNAindexsimplify = T)
#Take first non-NA value
$NonNAindex1=sapply(NonNAindex,"[",1)
specialVeghead(specialVeg)%>%kbl()
SpecialVeg0 | SpecialVeg1 | SpecialVeg2 | SpecialVeg3 | SpecialVeg4 | NonNAindex1 |
---|---|---|---|---|---|
NA | NA | NA | NA | NA | NA |
NA | NA | NA | NA | NA | NA |
0 | NA | NA | NA | NA | 1 |
NA | NA | NA | NA | NA | NA |
NA | NA | NA | 0 | 0 | 4 |
NA | 0 | 0 | NA | 0 | 2 |
#Find the first time they answered the vegetarianism question:
$nextSpecialVeg=0
specialVeg
#this loop takes a long time but couldn't figure out how to do it with vectors
for (i in 1:nrow(specialVeg)){
if(!is.na(specialVeg$NonNAindex1[i])){
#If the first non-NA special Veg = 1, set this to 1
if (specialVeg[i, specialVeg$NonNAindex1[i]]==1)
$nextSpecialVeg[i]=1}
{specialVeg#else set to zero
else {specialVeg$nextSpecialVeg[i]=0}
}#Else set to NA
else{specialVeg$nextSpecialVeg[i]=NA}
}
#Add to main table
$nextSpecialVeg=specialVeg$nextSpecialVeg
new
%>%filter(SpecialTimesVeg>0)%>%select(SpecialVeg0:SpecialVeg4, nextSpecialVeg)%>%head(20)%>%kbl()%>%row_spec(18, background="orange") new
SpecialVeg0 | SpecialVeg1 | SpecialVeg2 | SpecialVeg3 | SpecialVeg4 | nextSpecialVeg |
---|---|---|---|---|---|
NA | NA | NA | NA | 1 | 1 |
NA | 1 | NA | NA | 1 | 1 |
NA | 1 | NA | NA | NA | 1 |
NA | 1 | 1 | 1 | NA | 1 |
NA | 1 | NA | NA | NA | 1 |
NA | 1 | 1 | 1 | 1 | 1 |
NA | 1 | NA | 1 | 1 | 1 |
NA | NA | NA | NA | 1 | 1 |
1 | 1 | 1 | 0 | 1 | 1 |
NA | 1 | 1 | NA | 1 | 1 |
NA | NA | NA | NA | 1 | 1 |
NA | 1 | NA | NA | 1 | 1 |
NA | 1 | 0 | 1 | 1 | 1 |
1 | NA | NA | NA | 1 | 1 |
NA | 1 | 1 | NA | 1 | 1 |
NA | NA | 1 | 1 | NA | 1 |
NA | NA | NA | NA | 1 | 1 |
0 | 0 | 1 | NA | NA | 0 |
1 | 0 | 0 | NA | NA | 1 |
1 | NA | NA | NA | NA | 1 |
#Category 100106: Meat/fish yesterday. 24HR.
<-bd%>%select(f.103000.0.0, f.103000.1.0, f.103000.2.0,
Meatfish24103000.3.0, f.103000.4.0,
f.103140.0.0, f.103140.1.0, f.103140.2.0,
f.103140.3.0, f.103140.4.0)
f.
colnames(Meatfish24)<-c("Meat0", "Meat1","Meat2",
"Meat3","Meat4",
"Fish0","Fish1","Fish2",
"Fish3","Fish4")
<-as_tibble(sapply(Meatfish24,
Meatfish24c("No", "Yes"), c(0, 1)))
mapvalues, <-as_tibble(sapply(Meatfish24, as.numeric))
Meatfish24 Meatfish24
## # A tibble: 502,413 × 10
## Meat0 Meat1 Meat2 Meat3 Meat4 Fish0 Fish1 Fish2 Fish3 Fish4
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 NA NA NA NA NA NA NA NA NA NA
## 2 NA NA NA NA NA NA NA NA NA NA
## 3 0 NA NA NA NA 0 NA NA NA NA
## 4 NA NA NA NA NA NA NA NA NA NA
## 5 NA NA NA 1 1 NA NA NA 1 0
## 6 NA 0 1 NA 1 NA 0 0 NA 0
## 7 NA NA 0 NA NA NA NA 0 NA NA
## 8 NA 1 NA 0 NA NA 1 NA 1 NA
## 9 NA NA NA NA NA NA NA NA NA NA
## 10 NA NA NA NA 0 NA NA NA NA 0
## # … with 502,403 more rows
$Meatfish24Sum<-ifelse(
Meatfish24apply(
is.na(Meatfish24),1,all),NA,
rowSums(Meatfish24, na.rm = T)
)
$IID<-bd$f.eid
Meatfish24<-left_join(new, Meatfish24) new
## Joining, by = "IID"
$MeatFish24Veg<-NA
new$MeatFish24Veg[new$Meatfish24Sum==0]<-1
newtable(new$MeatFish24Veg)
##
## 1 <NA>
## 18631 483782
=new%>%select(Meat0:Meat4)
meat2=new%>%select(Fish0:Fish4)
fish2
#Find positions of all NA's
#NonNAindex strategy same as above, identical for Special, Meat and Fish cols.
$NonNAindex1=sapply(NonNAindex,"[",1)
meat2$NonNAindex1=sapply(NonNAindex,"[",1)
fish2
# Just the first 24HR time point -------------------------------------
$nextMeat24=0
meat2#0= didn't eat meat. 1 = ate meat
for (i in 1:nrow(meat2)){
if(!is.na(meat2$NonNAindex1[i])){
if (meat2[i, meat2$NonNAindex1[i]]==1){meat2$nextMeat24[i]=1}
else {meat2$nextMeat24[i]=0}
}else{meat2$nextMeat24[i]=NA}
}
$nextFish24=0
fish2#0= didn't eat fish. 1 = ate fish
for (i in 1:nrow(fish2)){
if(!is.na(fish2$NonNAindex1[i])){
if (fish2[i, fish2$NonNAindex1[i]]==1){fish2$nextFish24[i]=1}
else {fish2$nextFish24[i]=0}
}else{fish2$nextFish24[i]=NA}
}
#Add to main table
$nextMeat24=meat2$nextMeat24
new$nextFish24=fish2$nextFish24 new
#ate<-dat[c(17:21, 35:44)]
=new%>%select(SpecialVeg0:SpecialVeg4, Meat0:Fish4)
ate
#initialize four categories as columns in data frame "ate"
<-paste("vegAteMeatOnly", 0:4, sep="")
Vegatemeat<-NA
ate[Vegatemeat]<-paste("vegAteFishOnly", 0:4, sep="")
Vegatefish<-NA
ate[Vegatefish]<-paste("vegAteMeatAndFish", 0:4, sep="")
Vegatemeatandfish<-NA
ate[Vegatemeatandfish]<-paste("vegDidnt", 0:4, sep="")
Vegdidnt<-NA
ate[Vegdidnt]
for (i in 0:4){
#Loop over instances 0:4 of 24HR survey
print(paste(sum(ate[paste("SpecialVeg", i, sep="")]==1, na.rm=T), "self-identified veg at tp", i))
paste("vegAteMeatOnly", i, sep="")][
ate[paste("Meat", i, sep="")]==1 &
ate[paste("Fish", i, sep="")]==0 &
ate[paste("SpecialVeg", i, sep="")]==1
ate[=1
]
paste("vegAteFishOnly", i, sep="")][
ate[paste("Meat", i, sep="")]==0 &
ate[paste("Fish", i, sep="")]==1 &
ate[paste("SpecialVeg", i, sep="")]==1
ate[=1
]
paste("vegAteMeatAndFish", i, sep="")][
ate[paste("Meat", i, sep="")]==1 &
ate[paste("Fish", i, sep="")]==1 &
ate[paste("SpecialVeg", i, sep="")]==1
ate[=1
]
paste("vegDidnt", i, sep="")][
ate[paste("Meat", i, sep="")]==0 &
ate[paste("Fish", i, sep="")]==0 &
ate[paste("SpecialVeg", i, sep="")]==1
ate[=1
] }
## [1] "2515 self-identified veg at tp 0"
## [1] "3985 self-identified veg at tp 1"
## [1] "3241 self-identified veg at tp 2"
## [1] "4050 self-identified veg at tp 3"
## [1] "3883 self-identified veg at tp 4"
colnames(ate)
## [1] "SpecialVeg0" "SpecialVeg1" "SpecialVeg2"
## [4] "SpecialVeg3" "SpecialVeg4" "Meat0"
## [7] "Meat1" "Meat2" "Meat3"
## [10] "Meat4" "Fish0" "Fish1"
## [13] "Fish2" "Fish3" "Fish4"
## [16] "vegAteMeatOnly0" "vegAteMeatOnly1" "vegAteMeatOnly2"
## [19] "vegAteMeatOnly3" "vegAteMeatOnly4" "vegAteFishOnly0"
## [22] "vegAteFishOnly1" "vegAteFishOnly2" "vegAteFishOnly3"
## [25] "vegAteFishOnly4" "vegAteMeatAndFish0" "vegAteMeatAndFish1"
## [28] "vegAteMeatAndFish2" "vegAteMeatAndFish3" "vegAteMeatAndFish4"
## [31] "vegDidnt0" "vegDidnt1" "vegDidnt2"
## [34] "vegDidnt3" "vegDidnt4"
=ate[16:35]
ate ate
## # A tibble: 502,413 × 20
## vegAteMeatO…¹ vegAt…² vegAt…³ vegAt…⁴ vegAt…⁵ vegAt…⁶ vegAt…⁷ vegAt…⁸ vegAt…⁹
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 NA NA NA NA NA NA NA NA NA
## 2 NA NA NA NA NA NA NA NA NA
## 3 NA NA NA NA NA NA NA NA NA
## 4 NA NA NA NA NA NA NA NA NA
## 5 NA NA NA NA NA NA NA NA NA
## 6 NA NA NA NA NA NA NA NA NA
## 7 NA NA NA NA NA NA NA NA NA
## 8 NA NA NA NA NA NA NA NA NA
## 9 NA NA NA NA NA NA NA NA NA
## 10 NA NA NA NA NA NA NA NA NA
## # … with 502,403 more rows, 11 more variables: vegAteFishOnly4 <dbl>,
## # vegAteMeatAndFish0 <dbl>, vegAteMeatAndFish1 <dbl>,
## # vegAteMeatAndFish2 <dbl>, vegAteMeatAndFish3 <dbl>,
## # vegAteMeatAndFish4 <dbl>, vegDidnt0 <dbl>, vegDidnt1 <dbl>,
## # vegDidnt2 <dbl>, vegDidnt3 <dbl>, vegDidnt4 <dbl>, and abbreviated variable
## # names ¹vegAteMeatOnly0, ²vegAteMeatOnly1, ³vegAteMeatOnly2,
## # ⁴vegAteMeatOnly3, ⁵vegAteMeatOnly4, ⁶vegAteFishOnly0, ⁷vegAteFishOnly1, …
<-ate%>%
ate.plotgather(v, value,vegAteMeatOnly0:vegDidnt4, na.rm = T)%>%
group_by(v)%>%count(value)
as_tibble(ate.plot)
## # A tibble: 20 × 3
## v value n
## <chr> <dbl> <int>
## 1 vegAteFishOnly0 1 214
## 2 vegAteFishOnly1 1 312
## 3 vegAteFishOnly2 1 236
## 4 vegAteFishOnly3 1 255
## 5 vegAteFishOnly4 1 269
## 6 vegAteMeatAndFish0 1 19
## 7 vegAteMeatAndFish1 1 20
## 8 vegAteMeatAndFish2 1 12
## 9 vegAteMeatAndFish3 1 14
## 10 vegAteMeatAndFish4 1 19
## 11 vegAteMeatOnly0 1 76
## 12 vegAteMeatOnly1 1 75
## 13 vegAteMeatOnly2 1 71
## 14 vegAteMeatOnly3 1 100
## 15 vegAteMeatOnly4 1 86
## 16 vegDidnt0 1 2206
## 17 vegDidnt1 1 3578
## 18 vegDidnt2 1 2922
## 19 vegDidnt3 1 3681
## 20 vegDidnt4 1 3509
=ate.plot%>%separate(v, into=c("group", "TP"), sep= -1, convert=T)%>%select(-value)
ate.plotas.data.frame(ate.plot)
## group TP n
## 1 vegAteFishOnly 0 214
## 2 vegAteFishOnly 1 312
## 3 vegAteFishOnly 2 236
## 4 vegAteFishOnly 3 255
## 5 vegAteFishOnly 4 269
## 6 vegAteMeatAndFish 0 19
## 7 vegAteMeatAndFish 1 20
## 8 vegAteMeatAndFish 2 12
## 9 vegAteMeatAndFish 3 14
## 10 vegAteMeatAndFish 4 19
## 11 vegAteMeatOnly 0 76
## 12 vegAteMeatOnly 1 75
## 13 vegAteMeatOnly 2 71
## 14 vegAteMeatOnly 3 100
## 15 vegAteMeatOnly 4 86
## 16 vegDidnt 0 2206
## 17 vegDidnt 1 3578
## 18 vegDidnt 2 2922
## 19 vegDidnt 3 3681
## 20 vegDidnt 4 3509
$group=mapvalues(ate.plot$group,
ate.plotfrom=unique(ate.plot$group),
to=c("Ate fish", "Ate meat and fish",
"Ate meat", "Did not eat meat or fish"))
=0
propfor (i in 0:4){
=ate.plot[ate.plot$TP==i,]
y+1]=sum(y$n[1:3])/y$n[4]
prop[i+1]=round(prop[i+1]*100, digits = 2)
prop[i
}
=ate.plot%>%
ate.plotmutate_when(group=="Did not eat meat or fish", list(value=prop))
=ggplot(data=ate.plot, aes(x=TP, y=n, fill=group)) +
plotgeom_bar(position="stack", stat="identity", width=0.5)+
theme_bw()+
ggtitle("Self-identified 'vegetarian/vegan' who indicated meat or fish eating
on the same 24HR survey instance")+
scale_fill_manual(values=c("blue","red",
"green", "grey"))+
xlab("Instance of 24 hour recall")+
ylim(0,4200)+
geom_text(aes(label = ifelse(is.na(value), "",
paste(value, "%",sep="")), vjust=-3))+
labs(fill="Vegetarian/vegan who:")+
ylab("Number of participants")
plot
#write plot to tiff
# tiff(filename="veg.atemeatorfish.tiff", width=6, height=4, units='in', res=600)
# plot
# dev.off()
Veg5yr1 is the vegetarian variable used in the main analysis. Again the criteria are:
Self-identified as a vegetarian or vegan the first time they were asked on a 24HR survey
Didn’t indicate eating any meat or fish on that same 24HR survey
Indicated no major dietary changes over past 5 years
No meat or fish eating indicated on initial assessment
$Veg5yr1<-0
new$Veg5yr1[new$DietChanges5yr==0 &
new$IAveg==TRUE &
new$nextSpecialVeg==1 &
new$nextMeat24==0 &
new$nextFish24==0]=1
new$Veg5yr1[is.na(new$times_taken_24HR)]=NA
new
table(new$Veg5yr1) #3207
##
## 0 1 <NA>
## 207762 3205 291446
=new%>%select(Veg5yr1, DietChanges5yr, IAveg, nextSpecialVeg, nextMeat24, nextFish24, SpecialTimesVeg)%>%filter(SpecialTimesVeg>=1)
dqnrow(dq) #people who said they were vegetarian at least once
## [1] 9115
$nextMeatFish24=0
dq$nextMeatFish24[dq$nextMeat24==1 | dq$nextFish24==1]=1
dq
=as.data.frame(dq%>%group_by(nextSpecialVeg, nextMeatFish24,IAveg, DietChanges5yr)%>%summarize(n())) dq1
## `summarise()` has grouped output by 'nextSpecialVeg', 'nextMeatFish24', 'IAveg'. You can
## override using the `.groups` argument.
$nextSpecialVeg=mapvalues(dq1$nextSpecialVeg, from=c(0,1),
dq1to=c("No", "Yes"))
$IAveg=mapvalues(dq1$IAveg, from=c("TRUE", "FALSE"),
dq1to=c("No", "Yes")) #reversing the point of this column from "were they veg" to "did they eat meat"
$nextMeatFish24=mapvalues(dq1$nextMeatFish24, from=c(0,1),
dq1to=c("No", "Yes"))
$DietChanges5yr=mapvalues(dq1$DietChanges5yr, from=c(0,1),
dq1to=c("No", "Yes"))
colnames(dq1)=c("Veg. on first 24HR taken",
"Ate meat/fish on first 24HR taken",
"Ate meat/fish on initial assessment",
"Major dietary changes past 5 years",
"N")
=unlist(dq1%>%filter(`Veg. on first 24HR taken`=="No")%>%summarize(sum(N)))
novegfirst
#https://haozhu233.github.io/kableExtra/awesome_table_in_html.html
%>%filter(`Veg. on first 24HR taken`=="Yes")%>%
dq1add_row(`Veg. on first 24HR taken`="No",
`Ate meat/fish on first 24HR taken`="",
`Ate meat/fish on initial assessment`="",
`Major dietary changes past 5 years`="",
N=novegfirst)%>%
arrange(desc(`Veg. on first 24HR taken`),
`Ate meat/fish on initial assessment`)%>%
kbl(align="c", caption=paste("Survey results for", nrow(dq),"participants who self-identified as vegetarians at least once", sep=" ")) %>%
kable_classic_2(full_width = F)%>%
row_spec(1, background="#36E072")
Veg. on first 24HR taken | Ate meat/fish on first 24HR taken | Ate meat/fish on initial assessment | Major dietary changes past 5 years | N |
---|---|---|---|---|
Yes | No | No | No | 3205 |
Yes | No | No | Yes | 1136 |
Yes | Yes | No | No | 11 |
Yes | Yes | No | Yes | 11 |
Yes | No | Yes | No | 1628 |
Yes | No | Yes | Yes | 932 |
Yes | Yes | Yes | No | 490 |
Yes | Yes | Yes | Yes | 369 |
Yes | No | NA | NA | 6 |
No | 1327 |
# Write output ------------------------------------------
write.csv(new, "DefineVeg-09062022.csv", row.names=F, quote=F)
=new%>%filter(SpecialPropVeg>0)
vegonlywrite.csv(vegonly, "Vegonly-09062022.csv", row.names=F, quote=F)
This seemed unnecessarily complicated, so I verified with UKB and they confirmed:
“If a day is recorded in field 20080-x.0, but a null value is given for field 20086-x.0, then it means that the participant did not follow any of the six possible special diets during that period. Each diet that they did follow is recorded sequentially in fields 20086-x.0 through 20086-x.5 (i.e. the last is only populated if all diets were selected). No entry in field 20080-x.0 indicates the participant did not complete instance x of the dietary questionnaire (and all of 20086-x.0 through 20086-x.5 will be empty).”
–Sean Watson, UK Biobank data analyst, 8/31/2022↩︎