get_snpset()

get_snpset(
  df1,
  df2,
  thresh = 1e-08,
  protein_coding_only = TRUE,
  region_size = 1e+06,
  verbose = NULL,
  show_full_output = FALSE,
  build = 38
)

Arguments

df1

The dataframe to extract the top snps from (with p-value below thresh)

df2

The dataframe in which to search for overlapping SNPs from dataframe1

thresh

A number. P-value threshold, only extract variants with p-values below this threshold (5e-09 by default)

protein_coding_only

Logical, set this variable to TRUE to only use protein_coding genes for annotation

region_size

An integer (default = 1000000) (or a string represented as 100kb or 1MB) indicating the window size for variant labeling. Increase this number for sparser annotation and decrease for denser annotation.

verbose

Logical, (default: FALSE). Assign to TRUE to get information on which alleles are matched and which are not.

show_full_output

A logical scalar (default:FALSE). Assign to TRUE to show the full output from this function

build

A string, genome build, choose between builds 37 (GRCh37) and 38 (GRCh38) (default is 38)

Value

Dataframe of overlapping snps (snpset)

Examples

# \donttest{
CD_UKBB_index_snps <-get_lead_snps(CD_UKBB)
get_snpset(CD_UKBB_index_snps, CD_FINNGEN)
#> $matched
#>   CHROM      POS REF1 ALT1          P1        E1 REF2 ALT2          P2       E2
#> 1     5 40439961    C    T 7.42668e-11 0.1818381    C    T 2.80350e-13 0.187984
#> 2    16 50485831    A    G 8.18444e-16 0.5591071    A    G 8.06046e-08 0.426680
#> 3    16 50729867    G   GC 7.36933e-24 0.7618894    G   GC 1.49882e-09 0.530875
#> 4     1 67216513    A    G 8.03684e-20 0.2517527    A    G 1.04321e-08 0.146556
#>           ID Gene_Symbol        biotype
#> 1  rs7713270       TTC33 protein_coding
#> 2 rs76176364        NKD1 protein_coding
#> 3  rs2066847        NOD2 protein_coding
#> 4 rs11576518    C1orf141 protein_coding
#> 
#> $snp_not_found_in_df2
#>   CHROM       POS REF1 ALT1          P1         E1      ID_tmp
#> 1     2 233237298    A    C 1.70216e-09  0.1628839 2_233237298
#> 2     6  31660620    T    A 1.47082e-24  0.8893100  6_31660620
#> 3     6  32708532    A    C 1.27821e-15  0.5884587  6_32708532
#> 4     7  50274703    T    G 8.52335e-14  0.2077781  7_50274703
#> 5     9   4984530    G    C 5.03745e-11 -0.1833112   9_4984530
#> 
#> $snp_found_different_alleles_in_df2
#>  [1] CHROM  POS    REF1   ALT1   P1     E1     REF2   ALT2   P2     E2    
#> [11] ID     ID_tmp
#> <0 rows> (or 0-length row.names)
#> 
# }