--- title: Genodata module keywords: fastai sidebar: home_sidebar summary: "read and extract genodata" description: "read and extract genodata" nb_path: "00_Genodata.ipynb" ---
geno_path ='/home/dmc2245/UKBiobank/data/exome_files/project_VCF/072721_run/plink/ukb23156_c1.merged.filtered.bed'
/home/dmc2245/UKBiobank/data/exome_files/project_VCF/072721_run/plink/092321_UKBB_qc_exome_geno_path.txt
region = [5,272741,1213528-900000]
geno_path = 'MWE_region_extraction/ukb23156_c5.merged.filtered.5_272741_1213528.bed'
sumstats_path = 'MWE_region_extraction/090321_UKBB_Hearing_aid_f3393_expandedwhite_6436cases_96601ctrl_PC1_2_f3393.regenie.snp_stats'
pheno_path = None
unr_path = 'MWE_region_extraction/UKB_genotypedatadownloaded083019.090221_sample_variant_qc_final_callrate90.filtered.extracted.white_europeans.filtered.092821_ldprun_unrelated.filtered.prune.txt'
imp_geno_path = 'MWE_region_extraction/ukb_imp_chr5_v3_05_272856_1213643.bgen'
imp_sumstats_path = 'MWE_region_extraction/100521_UKBB_Hearing_aid_f3393_expandedwhite_15601cases_237318ctrl_500k_PC1_PC2_f3393.regenie.snp_stats'
imp_ref = 'hg19'
output_sumstats = 'test.snp_stats'
output_LD = 'test_corr.csv'
#main(region,geno_path,sumstats_path,pheno_path,unr_path,imp_geno_path,imp_sumstats_path,imp_ref,output_sumstats,output_LD)
from pandas_plink import Chunk
Chunk(512,512)
exome_geno.extractbyvariants(exome_geno.bim.snp[:50])
exome_geno.extractbysamples(exome_geno.fam.iid[:60])