VarWiz / app /logic /create_sankey.R
vojtam's picture
Upload 18 files
8e7c9f6 verified
box::use(
data.table[...],
stats[na.omit],
plotly[plot_ly, layout, add_annotations],
)
#' sankey_preprare_data
#' @description
#' The purpose of this function is to wrangle the data to such a form that
#' it plotly sankey can be made from it.
#'
#'
#' @param dataset A data table coming from preprocess_data function, it has the
#' following columns:
#' Consequence | given_ref | Allele | var_name | ensembl_id | biotype |
#' gene_name | variant_class | chrom | pos | all_kegg_gene_names | refseq_id |
#' kegg_paths_id | kegg_paths_name
#' @param selected_pathways a data.table of selected pathways for filtering.
#' The table has having the following structure:
#'
#' | pathway | row_i |
# 1: Endocytosis 5
#'
#' @return list of two data.tables and one vector:
#' - labels is a table with two columns:
#' | label | id |
#' COVID-19 | 1 |
#'
#' - full has the following columns:
#' gene_name | kegg_paths_name | var_name | first | second | third
#'
#' - scores is a vector of values
#' @export
#'
#' @examples
sankey_prepare_data <- function(dataset, selected_pathways, selected_genes, is_pathway) {
# filter the selected_pathways -> | gene_name | kegg_paths_name |
if (!is_pathway) {
sankey_data <- dataset[gene_name %in% selected_genes$gene_name, .(gene_name, kegg_paths_name, var_name, col)]
} else {
sankey_data <- dataset[kegg_paths_name %in% selected_pathways$pathway, .(gene_name, kegg_paths_name, var_name, col)]
}
setorder(sankey_data, kegg_paths_name, gene_name)
labels_all <- data.table(
label = c(sankey_data$kegg_paths_name, sankey_data$gene_name, sankey_data$var_name)
)
# get all labels their id
labels_all[, id := match(labels_all$label, unique(labels_all$label))]
labels <- unique(labels_all)
with_source <- sankey_data[labels, on = c(kegg_paths_name = "label")]
with_source <- with_source[labels, on = c(gene_name = "label")]
with_source <- with_source[labels, on = c(var_name = "label")]
setnames(with_source, old = c("id", "i.id", "i.id.1"), new = c("first", "second", "third"))
full <- unique(na.omit(with_source))
path_gene <- unique(full[, .(kegg_paths_name, gene_name, first, second)])
scores <- c(rle(rleid(c(full$first + full$second)))$lengths, rep(1, nrow(full)))
scores <- rep(1, length(scores))
return(list(labels, full, scores))
}
#' Title
#'
#' @param labels
#' @param sankey_data
#'
#' @return
#' @export
#'
#' @examples
create_sankey <- function(labels, path_gene, gene_variant, scores, colors) {
path_gene_labels <- labels
path_gene_labels[(nrow(labels) - length(unique(gene_variant$var_name)) + 1):nrow(labels)]$label <- ""
fig <- plot_ly(
type = "sankey",
orientation = "h",
selectedpoints = c(0:10),
node = list(
label = path_gene_labels$label,
y = seq(0, nrow(labels), by = 1),
color = "black",
pad = 30,
customdata = labels$label,
hovertemplate = paste("%{customdata}"),
thickness = 30,
line = list(
color = "black",
width = 2
)
),
link = list(
source = c(path_gene$first, gene_variant$second) - 1,
target = c(path_gene$second, gene_variant$third) - 1,
value = scores,
color = colors
)
)
fig <- fig |> add_annotations(x = 0, y = -0.1, showarrow = FALSE, font = list(size = 17), xref = "x", yref = "paper", text = "Pathway")
fig <- fig |> add_annotations(x = 1, y = -0.1, showarrow = FALSE, font = list(size = 17), xref = "x", yref = "paper", text = "Gene")
fig <- fig |> add_annotations(x = 2, y = -0.1, showarrow = FALSE, font = list(size = 17), xref = "x", yref = "paper", text = "Variant")
fig <- fig |> layout(
font = list(
size = 14
),
xaxis = list(
showgrid = FALSE,
zeroline = FALSE,
visible = FALSE
),
yaxis = list(
showgrid = FALSE,
zeroline = FALSE,
visible = FALSE
)
)
fig
}