Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -15,7 +15,7 @@ def get_ready_for_primers(gene_symbol, species="human"):
|
|
| 15 |
Entrez.email = "your_email@example.com"
|
| 16 |
|
| 17 |
try:
|
| 18 |
-
# 1. 搜索基因
|
| 19 |
search_term = f"{gene_symbol}[Gene Name] AND {species}[Organism] AND alive[prop]"
|
| 20 |
handle = Entrez.esearch(db="gene", term=search_term)
|
| 21 |
record = Entrez.read(handle)
|
|
@@ -23,47 +23,28 @@ def get_ready_for_primers(gene_symbol, species="human"):
|
|
| 23 |
return {"error": f"未找到基因: {gene_symbol}"}
|
| 24 |
gene_id = record["IdList"][0]
|
| 25 |
|
| 26 |
-
# 2. 获取
|
| 27 |
-
#
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
link_record = Entrez.read(link_handle)
|
| 30 |
|
| 31 |
-
#
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
if "nucleotide" in link_set["DbTo"]:
|
| 35 |
-
all_ids.extend([link["Id"] for link in link_set["Link"]])
|
| 36 |
|
| 37 |
-
|
| 38 |
-
return {"error": "找不到关联序列"}
|
| 39 |
-
|
| 40 |
-
# 使用 esummary 批量检查这些 ID 哪个是我们要的 RefSeq 转录本
|
| 41 |
-
summary_handle = Entrez.esummary(db="nucleotide", id=",".join(all_ids))
|
| 42 |
-
summaries = Entrez.read(summary_handle)
|
| 43 |
-
|
| 44 |
-
nucl_id = None
|
| 45 |
-
for summary in summaries:
|
| 46 |
-
accession = summary.get('Caption', '')
|
| 47 |
-
# 关键:只选择以 NM_ (编码) 或 NR_ (非编码) 开头的 RefSeq 序列
|
| 48 |
-
if accession.startswith('NM_') or accession.startswith('NR_'):
|
| 49 |
-
nucl_id = summary['Id']
|
| 50 |
-
break
|
| 51 |
|
| 52 |
-
#
|
| 53 |
-
if not nucl_id:
|
| 54 |
-
for summary in summaries:
|
| 55 |
-
if summary.get('Caption', '').startswith(('XM_', 'XR_')):
|
| 56 |
-
nucl_id = summary['Id']
|
| 57 |
-
break
|
| 58 |
-
|
| 59 |
-
if not nucl_id:
|
| 60 |
-
return {"error": f"基因 {gene_symbol} 找不到标准的 RefSeq 转录本 (NM_/NR_)"}
|
| 61 |
-
|
| 62 |
-
# 4. 下载并解析序列
|
| 63 |
handle = Entrez.efetch(db="nucleotide", id=nucl_id, rettype="gb", retmode="text")
|
| 64 |
seq_record = SeqIO.read(handle, "genbank")
|
| 65 |
|
| 66 |
-
#
|
| 67 |
junctions = []
|
| 68 |
current_pos = 0
|
| 69 |
for feature in seq_record.features:
|
|
@@ -72,7 +53,8 @@ def get_ready_for_primers(gene_symbol, species="human"):
|
|
| 72 |
current_pos += (end - start)
|
| 73 |
junctions.append(int(current_pos))
|
| 74 |
|
| 75 |
-
if junctions:
|
|
|
|
| 76 |
|
| 77 |
return {
|
| 78 |
"symbol": gene_symbol,
|
|
@@ -81,7 +63,8 @@ def get_ready_for_primers(gene_symbol, species="human"):
|
|
| 81 |
"junctions": junctions
|
| 82 |
}
|
| 83 |
except Exception as e:
|
| 84 |
-
|
|
|
|
| 85 |
|
| 86 |
def design_qpcr_primers(gene_data):
|
| 87 |
"""设计qPCR引物,包含失败重试机制"""
|
|
|
|
| 15 |
Entrez.email = "your_email@example.com"
|
| 16 |
|
| 17 |
try:
|
| 18 |
+
# 1. 搜索基因 ID (保持不变)
|
| 19 |
search_term = f"{gene_symbol}[Gene Name] AND {species}[Organism] AND alive[prop]"
|
| 20 |
handle = Entrez.esearch(db="gene", term=search_term)
|
| 21 |
record = Entrez.read(handle)
|
|
|
|
| 23 |
return {"error": f"未找到基因: {gene_symbol}"}
|
| 24 |
gene_id = record["IdList"][0]
|
| 25 |
|
| 26 |
+
# 2. 获取核苷酸 ID
|
| 27 |
+
# 核心修复点:使用 "refseq[filter] AND RNA[filter]"
|
| 28 |
+
# 这会同时命中 NM_ (mRNA) 和 NR_ (ncRNA),且排除掉染色体大片段
|
| 29 |
+
link_handle = Entrez.elink(
|
| 30 |
+
dbfrom="gene",
|
| 31 |
+
db="nucleotide",
|
| 32 |
+
id=gene_id,
|
| 33 |
+
term="refseq[filter] AND RNA[filter]"
|
| 34 |
+
)
|
| 35 |
link_record = Entrez.read(link_handle)
|
| 36 |
|
| 37 |
+
# 检查是否有返回结果,防止 list index out of range
|
| 38 |
+
if not link_record[0]["LinkSetDb"]:
|
| 39 |
+
return {"error": f"基因 {gene_symbol} 找不到标准的 RefSeq 转录本"}
|
|
|
|
|
|
|
| 40 |
|
| 41 |
+
nucl_id = link_record[0]["LinkSetDb"][0]["Link"][0]["Id"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
+
# 3. 下载并解析 (恢复你最开始的版本)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
handle = Entrez.efetch(db="nucleotide", id=nucl_id, rettype="gb", retmode="text")
|
| 45 |
seq_record = SeqIO.read(handle, "genbank")
|
| 46 |
|
| 47 |
+
# 4. 提取外显子
|
| 48 |
junctions = []
|
| 49 |
current_pos = 0
|
| 50 |
for feature in seq_record.features:
|
|
|
|
| 53 |
current_pos += (end - start)
|
| 54 |
junctions.append(int(current_pos))
|
| 55 |
|
| 56 |
+
if junctions:
|
| 57 |
+
junctions.pop() # 最后一个点不是交界点
|
| 58 |
|
| 59 |
return {
|
| 60 |
"symbol": gene_symbol,
|
|
|
|
| 63 |
"junctions": junctions
|
| 64 |
}
|
| 65 |
except Exception as e:
|
| 66 |
+
# 这里的报错能帮我们准确定位是哪一步出的错
|
| 67 |
+
return {"error": f"获取 {gene_symbol} 失败: {str(e)}"}
|
| 68 |
|
| 69 |
def design_qpcr_primers(gene_data):
|
| 70 |
"""设计qPCR引物,包含失败重试机制"""
|