GWAS was performed in Regenie v3.1.2. Regenie is optimized for use with traits that have imbalanced case:control ratio (our veg:non-veg was approx 1:75).
https://github.com/rgcgithub/regenie
https://rgcgithub.github.io/regenie/
I followed their UKB recommendation settings: https://rgcgithub.github.io/regenie/recommendations/
Step 0 prepares a single genotype file of high quality variants from the UKB variant call files (non-imputed) for use in Step 1 (whole genome regression).
#!/bin/bash
#SBATCH --partition=highmem_p
#SBATCH --job-name=genoQC-combine
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=16
#SBATCH --time=144:00:00
#SBATCH --mem=190000
#SBATCH --output=%x.%j.out
#SBATCH --error=%x.%j.err
#---#SBATCH --array=1
cd /work/kylab/mike/BioxVeg/genotypeQC
#---------
#Set which
#steps run
#---------
step1=F
#Cancel array jobs below here-----
step2=F
step3=true
#---------
if [ $step1 = true ]; then
ml PLINK/2.00-alpha2.3-x86_64-20210920-dev
i=$SLURM_ARRAY_TASK_ID
###-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
###STEP 1. GENOTYPE QC PLINK-=-=-=-=-=-=-=-=
###-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
echo "-=-=-=-=-=-=-=-STEP 1-=-=-=-=-=-=-=-\n\n"
genoindir=("/scratch/mf91122/bgen_v1.2_UKBsource")
mfiscoredir=("/work/kylab/mike/UKB/quality-scores/mfi")
outdir=("/scratch/mf91122/BioxVeg/genotypeQC/chr")
mkdir -p $outdir
plink2 \
$genoindir/ukb_imp_chr"$i"_v3.bgen ref-first \
--bgen $genoindir/ukb_imp_v3.sample \
--sample $mfiscoredir/ukb_mfi_keepsnps_chr"$i".txt \
--extract \
--mind 0.05 \
--geno 0.02 \
--hwe 1e-06 \
--maf 0.01 \
--keep /scratch/mf91122/LipidsxVeg/pheno/LipidsxVeg_phenoQC_IDS.txt \$r:\$a \
--set-missing-var-ids @:#:\
--rm-dup force-first \
--new-id-max-allele-len 414 \
--max-alleles 2 \
--make-pgen "$outdir"/chr"$i"
--out
fi
if [ $step2 = true ]; then
###-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
###STEP 2. COMBINE=-=-=-=-=-=-=-=-=-=-=-=-=-
###-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
calldir=("/scratch/mf91122/BioxVeg/genotypeQC/originalCalls")
outdir=("/scratch/mf91122/BioxVeg/genotypeQC/combineCalls")
mkdir -p $outdir
#Make list
rm -f "$calldir"/merge.txt
for chr in {2..22}; do echo "/scratch/mf91122/BioxVeg/genotypeQC/originalCalls/ukb_cal_chr${chr}_v2.bed /scratch/mf91122/BioxVeg/genotypeQC/originalCalls/ukb_snp_chr${chr}_v2.bim /scratch/mf91122/BioxVeg/genotypeQC/originalCalls/ukb48818_cal_chr1_v2_s488282.fam" >> $calldir/merge.txt; done
ml PLINK/1.9b_6-24-x86_64
plink \
$calldir/ukb_cal_chr1_v2.bed \
--bed $calldir/ukb_snp_chr1_v2.bim \
--bim $calldir/ukb48818_cal_chr1_v2_s488282.fam \
--fam $calldir/merge.txt \
--merge-list \
--make-bed $outdir/ukb_cal_allChrs
--out
fi
if [ $step3 = true ]; then
outdir=("/scratch/mf91122/BioxVeg/genotypeQC/combineCalls")
ml PLINK/2.00-alpha2.3-x86_64-20210920-dev
plink2 \
$outdir/ukb_cal_allChrs \
--bfile --mac 100 --geno 0.1 --hwe 1e-15 \
--maf 0.01 \
--mind 0.1 --write-samples --no-id-header \
--write-snplist $outdir/qc_pass
--out
fi
#!/bin/bash
#SBATCH --partition=highmem_p
#SBATCH --job-name=regenie-step1
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=16
#SBATCH --time=72:00:00
#SBATCH --mem=100000
#SBATCH --output=%x.%j.out
#SBATCH --error=%x.%j.err
cd /work/kylab/mike/BioxVeg/regenie/GWAS
ml regenie/3.1.3-conda
phenodir=("/scratch/mf91122/BioxVeg/pheno/regenie")
step1genoindir=("/scratch/mf91122/BioxVeg/genotypeQC/combineCalls")
outdir=("/scratch/mf91122/BioxVeg/regenie/VegGWAS/Step1")
mkdir -p $outdir
regenie \
\
--step 1 $step1genoindir/ukb_cal_allChrs \
--bed $step1genoindir/qc_pass.snplist \
--extract $step1genoindir/qc_pass.id \
--keep $phenodir/phenoveg.txt \
--phenoFile \
--phenoColList Veg5yr1 $phenodir/covar.txt \
--covarFile \
--covarColList Age,PCA1,PCA2,PCA3,PCA4,PCA5,PCA6,PCA7,PCA8,PCA9,PCA10,Townsend \
--catCovarList Sex,Geno_batch,AlcoholFreq,PreviousSmoker,CurrentSmoker \
--bt \
--bsize 1000 \
--lowmem $outdir/regenie_tmp_preds \
--lowmem-prefix $outdir/ukb_step1_Veg2GWAS
--out
sbatch /work/kylab/mike/BioxVeg/regenie/GWAS/step2.sh
#!/bin/bash
#SBATCH --partition=batch
#SBATCH --job-name=regenie-VegGWAS-step2
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=16
#SBATCH --time=72:00:00
#SBATCH --mem=40000
#SBATCH --output=%x.%j.out
#SBATCH --error=%x.%j.err
#SBATCH --array=1-22
i=$SLURM_ARRAY_TASK_ID
cd /work/kylab/mike/BioxVeg/regenie/GWAS
ml regenie/3.1.3-conda
phenodir=("/scratch/mf91122/BioxVeg/pheno/regenie")
step1genoindir=("/scratch/mf91122/BioxVeg/genotypeQC/combineCalls")
step1dir=("/scratch/mf91122/BioxVeg/regenie/VegGWAS/Step1")
step2genoindir=("/scratch/mf91122/BioxVeg/genotypeQC/chr")
outdir=("/scratch/mf91122/BioxVeg/regenie/VegGWAS/Step2")
mkdir -p $outdir
regenie \
\
--step 2 $step2genoindir/chr"$i" \
--pgen $phenodir/phenoveg.txt \
--phenoFile \
--phenoColList Veg5yr1 $phenodir/covar.txt \
--covarFile \
--covarColList Age,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8,PC9,PC10,Townsend \
--catCovarList Sex,Geno_batch,AlcoholFreq,PreviousSmoker,CurrentSmoker \
--bt --approx --pThresh 0.01 \
--firth $step1dir/ukb_step1_Veg2GWAS_pred.list \
--pred \
--bsize 400 $outdir/ukb_step2_Veg2GWAS_chr"$i" --out
suppressMessages(library(qqman))
suppressMessages(library(tidyverse))
#devtools::install_github('kaustubhad/fastman')
library(fastman)
<-list(c("#0CEBA8", "#066C4E"))
mancolors
="/scratch/mf91122/BioxVeg/regenie/VegGWAS/Step2"
dir
="Veg5yr1"
p
=list()
chrfor (c in 1:22){
=as_tibble(read_delim(paste(
chr[[c]]"/ukb_step2_Veg2GWAS_chr",c,"_",p,".regenie", sep="")))
dir,
}
=do.call(rbind,chr)
file<-file%>%select(CHROM, ID, GENPOS, LOG10P)
file2$P=10^(-file2$LOG10P)
file2=file2%>%select(-LOG10P)
file2
colnames(file2)=c("CHR", "SNP", "BP", "P")
suppressWarnings(dir.create(paste(dir, "/../plot", sep="")))
=paste(dir, "/../plot/",p,"-regenie-Manhattan.labeled.png", sep="")
plotoutputpath
#Make Manhattan plot P_BOLT_LMM
png(filename=plotoutputpath, type="cairo",
width = 10, height = 5, units = 'in', res = 300)
print(
manhattan(file2,
main = paste("UKB-EUR ",p, "fixed", sep=""),
ylim = c(0, 10),
cex = 0.6,
#annotatePval = 5e-05,
annotateTop = TRUE,
cex.axis = 0.9,
col = mancolors[[1]],
suggestiveline = -log10(1e-05),
genomewideline = -log10(5e-08),
chrlabs = as.character(1:22)
#end manhattan
)dev.off()
);
=paste(dir, "/../plot/", p,"-regenie-fastQQ.png", sep="")
qqoutputpath
png(filename=qqoutputpath, type="cairo", width = 5, height = 5, units = 'in', res = 300)
fastqq(file2$P)
dev.off()