Spaces:

Xue-Jun
/

StructureBasedSimilarityNetwork

Sleeping

App Files Files Community

StructureBasedSimilarityNetwork / r_functions.py

Xue-Jun

Upload 6 files

4a4b152 verified 6 months ago

raw

history blame contribute delete

4.84 kB

	import rpy2.robjects as ro
	from rpy2.robjects import pandas2ri
	from rpy2.robjects.conversion import localconverter
	from rpy2.robjects.packages import importr

	pandas2ri.activate()

	# 导入必要的 R 包
	stats = importr("stats")
	ape = importr("ape")
	igraph = importr("igraph", robject_translations={".env": "_env_"})
	openxlsx = importr("openxlsx")


	def get_r_matrix(df):
	with localconverter(ro.default_converter + pandas2ri.converter):
	r_tm_matrix = ro.conversion.py2rpy(df)
	return r_tm_matrix


	export_matrix_to_newick_r = ro.r(
	"""
	convert_to_newick <- function(tm_matrix, output_file) {
	# 导入 ape 包
	if (!require(ape, quietly = TRUE)) {
	install.packages("ape", repos = "https://cran.r-project.org")
	library(ape)
	}
	# 计算距离矩阵
	dist_matrix <- dist(tm_matrix)
	# 层次聚类
	hclust_tree <- hclust(dist_matrix, method = "ward.D2")
	# 转为 phylo 对象
	phylo_tree <- as.phylo(hclust_tree)
	# 导出为 Newick 格式
	write.tree(phylo_tree, file = output_file)
	newick_str <- write.tree(phylo_tree)
	return(newick_str)
	}
	"""
	)

	export_similarity_network_r = ro.r(
	"""
	create_similarity_network_r <- function(threshold, tm_matrix, excel_path) {
	# 导入必要的包
	if (!require(igraph, quietly = TRUE)) {
	install.packages("igraph", repos = "https://cran.r-project.org")
	library(igraph)
	}
	if (!require(openxlsx, quietly = TRUE)) {
	install.packages("openxlsx", repos = "https://cran.r-project.org")
	library(openxlsx)
	}
	# 根据相似性阈值创建边缘列表，并过滤掉自环
	overthresholdedges <- which(tm_matrix >= threshold, arr.ind = TRUE)
	overthresholdedges <- overthresholdedges[overthresholdedges[, 1] != overthresholdedges[, 2], ]
	# 创建空的图形对象
	graph <- graph.empty()
	# 添加节点
	nodes <- rownames(tm_matrix)
	graph <- add_vertices(graph, nv = length(nodes), name = nodes)
	# 添加边
	for (i in 1:nrow(overthresholdedges)) {
	graph <- add_edges(graph, c(overthresholdedges[i, 1], overthresholdedges[i, 2]))
	}
	# 转换为无向图
	graph <- as.undirected(graph, mode = "collapse")
	# 计算聚类
	clusters <- fastgreedy.community(graph)
	# 获取每个聚类的大小
	cluster_sizes <- sizes(clusters)
	# 按聚类大小降序排序
	sorted_clusters <- clusters[order(cluster_sizes, decreasing = TRUE)]
	# 获取每个聚类的成员
	cluster_members <- membership(clusters)
	# 找到孤立节点
	singleton_nodes <- names(cluster_members[cluster_members %in% which(sizes(clusters) == 1)])
	# 创建Cytoscape导出文件
	cytoscape_export <- createWorkbook()
	# 创建边Sheet
	addWorksheet(cytoscape_export, sheetName = "Edges")
	writeData(cytoscape_export, sheet = "Edges", x = "Source", startCol = 1, startRow = 1)
	writeData(cytoscape_export, sheet = "Edges", x = "Target", startCol = 2, startRow = 1)
	# 获取边列表
	edges <- get.edgelist(graph)
	# 填充边Sheet数据
	if (nrow(edges) > 0) {
	writeData(cytoscape_export, sheet = "Edges", x = V(graph)[edges[, 1]]$name, startCol = 1, startRow = 2)
	writeData(cytoscape_export, sheet = "Edges", x = V(graph)[edges[, 2]]$name, startCol = 2, startRow = 2)
	}
	# 找到当前边Sheet的最后一行
	last_edge_row <- nrow(edges) + 1
	# 添加孤立节点
	if (length(singleton_nodes) > 0) {
	writeData(cytoscape_export, sheet = "Edges", x = singleton_nodes, startCol = 1, startRow = last_edge_row + 1)
	}
	# 保存Excel文件
	saveWorkbook(cytoscape_export, excel_path, overwrite = TRUE)
	# 创建一个空的数据框用于储存节点和聚类信息
	export_clusters <- data.frame(protein = character(), cluster_name = character(), stringsAsFactors = FALSE)
	# 遍历 sorted_clusters
	cluster_index <- 1 # 初始化簇索引
	for (cluster_name in names(sorted_clusters)) {
	proteins <- sorted_clusters[[cluster_name]]
	# 将每个 protein 和对应的 cluster_name 添加到数据框
	for (protein in proteins) {
	# 检查 protein 是否在 singleton_nodes 中
	if (protein %in% singleton_nodes) {
	current_cluster_name <- "singleton" # 修改为 "singleton"
	} else {
	current_cluster_name <- as.character(cluster_index) # 使用簇索引
	}
	export_clusters <- rbind(export_clusters, data.frame(protein = protein, cluster_name = current_cluster_name))
	}
	cluster_index <- cluster_index + 1 # 索引加1
	}
	# 返回聚类结果
	return(list(cluster_data = export_clusters, graph = graph))
	}
	"""
	)