| import rpy2.robjects as ro |
| from rpy2.robjects import pandas2ri |
| from rpy2.robjects.conversion import localconverter |
| from rpy2.robjects.packages import importr |
|
|
| pandas2ri.activate() |
|
|
| |
| stats = importr("stats") |
| ape = importr("ape") |
| igraph = importr("igraph", robject_translations={".env": "_env_"}) |
| openxlsx = importr("openxlsx") |
|
|
|
|
| def get_r_matrix(df): |
| with localconverter(ro.default_converter + pandas2ri.converter): |
| r_tm_matrix = ro.conversion.py2rpy(df) |
| return r_tm_matrix |
|
|
|
|
| export_matrix_to_newick_r = ro.r( |
| """ |
| convert_to_newick <- function(tm_matrix, output_file) { |
| # 导入 ape 包 |
| if (!require(ape, quietly = TRUE)) { |
| install.packages("ape", repos = "https://cran.r-project.org") |
| library(ape) |
| } |
| # 计算距离矩阵 |
| dist_matrix <- dist(tm_matrix) |
| # 层次聚类 |
| hclust_tree <- hclust(dist_matrix, method = "ward.D2") |
| # 转为 phylo 对象 |
| phylo_tree <- as.phylo(hclust_tree) |
| # 导出为 Newick 格式 |
| write.tree(phylo_tree, file = output_file) |
| newick_str <- write.tree(phylo_tree) |
| return(newick_str) |
| } |
| """ |
| ) |
|
|
| export_similarity_network_r = ro.r( |
| """ |
| create_similarity_network_r <- function(threshold, tm_matrix, excel_path) { |
| # 导入必要的包 |
| if (!require(igraph, quietly = TRUE)) { |
| install.packages("igraph", repos = "https://cran.r-project.org") |
| library(igraph) |
| } |
| if (!require(openxlsx, quietly = TRUE)) { |
| install.packages("openxlsx", repos = "https://cran.r-project.org") |
| library(openxlsx) |
| } |
| # 根据相似性阈值创建边缘列表,并过滤掉自环 |
| overthresholdedges <- which(tm_matrix >= threshold, arr.ind = TRUE) |
| overthresholdedges <- overthresholdedges[overthresholdedges[, 1] != overthresholdedges[, 2], ] |
| # 创建空的图形对象 |
| graph <- graph.empty() |
| # 添加节点 |
| nodes <- rownames(tm_matrix) |
| graph <- add_vertices(graph, nv = length(nodes), name = nodes) |
| # 添加边 |
| for (i in 1:nrow(overthresholdedges)) { |
| graph <- add_edges(graph, c(overthresholdedges[i, 1], overthresholdedges[i, 2])) |
| } |
| # 转换为无向图 |
| graph <- as.undirected(graph, mode = "collapse") |
| # 计算聚类 |
| clusters <- fastgreedy.community(graph) |
| # 获取每个聚类的大小 |
| cluster_sizes <- sizes(clusters) |
| # 按聚类大小降序排序 |
| sorted_clusters <- clusters[order(cluster_sizes, decreasing = TRUE)] |
| # 获取每个聚类的成员 |
| cluster_members <- membership(clusters) |
| # 找到孤立节点 |
| singleton_nodes <- names(cluster_members[cluster_members %in% which(sizes(clusters) == 1)]) |
| # 创建Cytoscape导出文件 |
| cytoscape_export <- createWorkbook() |
| # 创建边Sheet |
| addWorksheet(cytoscape_export, sheetName = "Edges") |
| writeData(cytoscape_export, sheet = "Edges", x = "Source", startCol = 1, startRow = 1) |
| writeData(cytoscape_export, sheet = "Edges", x = "Target", startCol = 2, startRow = 1) |
| # 获取边列表 |
| edges <- get.edgelist(graph) |
| # 填充边Sheet数据 |
| if (nrow(edges) > 0) { |
| writeData(cytoscape_export, sheet = "Edges", x = V(graph)[edges[, 1]]$name, startCol = 1, startRow = 2) |
| writeData(cytoscape_export, sheet = "Edges", x = V(graph)[edges[, 2]]$name, startCol = 2, startRow = 2) |
| } |
| # 找到当前边Sheet的最后一行 |
| last_edge_row <- nrow(edges) + 1 |
| # 添加孤立节点 |
| if (length(singleton_nodes) > 0) { |
| writeData(cytoscape_export, sheet = "Edges", x = singleton_nodes, startCol = 1, startRow = last_edge_row + 1) |
| } |
| # 保存Excel文件 |
| saveWorkbook(cytoscape_export, excel_path, overwrite = TRUE) |
| # 创建一个空的数据框用于储存节点和聚类信息 |
| export_clusters <- data.frame(protein = character(), cluster_name = character(), stringsAsFactors = FALSE) |
| # 遍历 sorted_clusters |
| cluster_index <- 1 # 初始化簇索引 |
| for (cluster_name in names(sorted_clusters)) { |
| proteins <- sorted_clusters[[cluster_name]] |
| # 将每个 protein 和对应的 cluster_name 添加到数据框 |
| for (protein in proteins) { |
| # 检查 protein 是否在 singleton_nodes 中 |
| if (protein %in% singleton_nodes) { |
| current_cluster_name <- "singleton" # 修改为 "singleton" |
| } else { |
| current_cluster_name <- as.character(cluster_index) # 使用簇索引 |
| } |
| export_clusters <- rbind(export_clusters, data.frame(protein = protein, cluster_name = current_cluster_name)) |
| } |
| cluster_index <- cluster_index + 1 # 索引加1 |
| } |
| # 返回聚类结果 |
| return(list(cluster_data = export_clusters, graph = graph)) |
| } |
| """ |
| ) |
|
|