Skip to contents

pheno_data component harvesting from columns of summary stats MatrixTable allowing for info on multiple populations in the pheno_data component

Usage

multipop_df(
  x,
  top2get = c("trait_type", "phenocode", "description", "modifier", "coding_description",
    "coding"),
  pheno2get = c("n_cases", "n_controls", "heritability", "pop")
)

Arguments

x

Struct - a single element of the list returned by mt$cols()$collect()

top2get

character() vector of general fields to retrieve

pheno2get

character() vector of fields to be retrieved for each subpopulation

Value

data.frame

Examples

# following are too time-consuming but can be of interest
# if (nchar(Sys.getenv("HAIL_UKBB_SUMSTAT_10K_PATH"))>0) {
#  hl = hail_init()
#  ss = get_ukbb_sumstat_10kloci_mt(hl)
#  sscol = ss$cols()$collect() # may take a bit of time
#  print(length(sscol))
#  multipop_df(sscol[[1]])
# }
#
# \donttest{
# if (nchar(Sys.getenv("HAIL_UKBB_SUMSTAT_10K_PATH"))>0) {
# # to get an overview of all phenotype-cohort combinations in a searchable table
# mmm = lapply(sscol, multipop_df )
# mymy = do.call(rbind, mmm) # over 16k rows
# DT::datatable(mymy)
# }
#
# }
# this runs quickly and is demonstrative
hl <- hail_init()
litzip <- system.file("extdata", "myss2.zip", package = "BiocHail")
td <- tempdir()
unzip(litzip, exdir = td)
ntab <- hl$read_matrix_table(paste0(td, "/myss2.mt"))
ntab$describe()
nt2 <- ntab$col$collect()
multipop_df(nt2[[1]]) # must select one element
#>   trait_type phenocode description modifier coding_description coding n_cases
#> 1 biomarkers     30600     Albumin     irnt                 NA           5759
#> 2 biomarkers     30600     Albumin     irnt                 NA            856
#> 3 biomarkers     30600     Albumin     irnt                 NA           7694
#> 4 biomarkers     30600     Albumin     irnt                 NA           2340
#> 5 biomarkers     30600     Albumin     irnt                 NA         367192
#> 6 biomarkers     30600     Albumin     irnt                 NA           1364
#>   n_controls heritability pop
#> 1         NA   0.25412190 AFR
#> 2         NA   0.11268439 AMR
#> 3         NA   0.24110706 CSA
#> 4         NA   0.06126386 EAS
#> 5         NA   0.06449071 EUR
#> 6         NA   0.20458513 MID