pheno_data component harvesting from columns of summary stats MatrixTable allowing for info on multiple populations in the pheno_data component
Source:R/pheno_col_ops.R
multipop_df.Rd
pheno_data component harvesting from columns of summary stats MatrixTable allowing for info on multiple populations in the pheno_data component
Examples
# following are too time-consuming but can be of interest
# if (nchar(Sys.getenv("HAIL_UKBB_SUMSTAT_10K_PATH"))>0) {
# hl = hail_init()
# ss = get_ukbb_sumstat_10kloci_mt(hl)
# sscol = ss$cols()$collect() # may take a bit of time
# print(length(sscol))
# multipop_df(sscol[[1]])
# }
#
# \donttest{
# if (nchar(Sys.getenv("HAIL_UKBB_SUMSTAT_10K_PATH"))>0) {
# # to get an overview of all phenotype-cohort combinations in a searchable table
# mmm = lapply(sscol, multipop_df )
# mymy = do.call(rbind, mmm) # over 16k rows
# DT::datatable(mymy)
# }
#
# }
# this runs quickly and is demonstrative
hl <- hail_init()
litzip <- system.file("extdata", "myss2.zip", package = "BiocHail")
td <- tempdir()
unzip(litzip, exdir = td)
ntab <- hl$read_matrix_table(paste0(td, "/myss2.mt"))
ntab$describe()
nt2 <- ntab$col$collect()
multipop_df(nt2[[1]]) # must select one element
#> trait_type phenocode description modifier coding_description coding n_cases
#> 1 biomarkers 30600 Albumin irnt NA 5759
#> 2 biomarkers 30600 Albumin irnt NA 856
#> 3 biomarkers 30600 Albumin irnt NA 7694
#> 4 biomarkers 30600 Albumin irnt NA 2340
#> 5 biomarkers 30600 Albumin irnt NA 367192
#> 6 biomarkers 30600 Albumin irnt NA 1364
#> n_controls heritability pop
#> 1 NA 0.25412190 AFR
#> 2 NA 0.11268439 AMR
#> 3 NA 0.24110706 CSA
#> 4 NA 0.06126386 EAS
#> 5 NA 0.06449071 EUR
#> 6 NA 0.20458513 MID