PharC commited on
Commit
0c0b2b6
·
verified ·
1 Parent(s): 4bedae3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -37
app.py CHANGED
@@ -15,7 +15,7 @@ def get_ready_for_primers(gene_symbol, species="human"):
15
  Entrez.email = "your_email@example.com"
16
 
17
  try:
18
- # 1. 搜索基因
19
  search_term = f"{gene_symbol}[Gene Name] AND {species}[Organism] AND alive[prop]"
20
  handle = Entrez.esearch(db="gene", term=search_term)
21
  record = Entrez.read(handle)
@@ -23,47 +23,28 @@ def get_ready_for_primers(gene_symbol, species="human"):
23
  return {"error": f"未找到基因: {gene_symbol}"}
24
  gene_id = record["IdList"][0]
25
 
26
- # 2. 获取关联序列
27
- # 注意不再使用 term,避免 NCBI 内部过滤器的不确定性
28
- link_handle = Entrez.elink(dbfrom="gene", db="nucleotide", id=gene_id)
 
 
 
 
 
 
29
  link_record = Entrez.read(link_handle)
30
 
31
- # 3. 筛选高质量的 RefSeq 序列 (NM_ 或 NR_)
32
- all_ids = []
33
- for link_set in link_record[0].get("LinkSetDb", []):
34
- if "nucleotide" in link_set["DbTo"]:
35
- all_ids.extend([link["Id"] for link in link_set["Link"]])
36
 
37
- if not all_ids:
38
- return {"error": "找不到关联序列"}
39
-
40
- # 使用 esummary 批量检查这些 ID 哪个是我们要的 RefSeq 转录本
41
- summary_handle = Entrez.esummary(db="nucleotide", id=",".join(all_ids))
42
- summaries = Entrez.read(summary_handle)
43
-
44
- nucl_id = None
45
- for summary in summaries:
46
- accession = summary.get('Caption', '')
47
- # 关键:只选择以 NM_ (编码) 或 NR_ (非编码) 开头的 RefSeq 序列
48
- if accession.startswith('NM_') or accession.startswith('NR_'):
49
- nucl_id = summary['Id']
50
- break
51
 
52
- # 如果没找到 NM/NR,退而求其次找 XM/XR (预测转录本)
53
- if not nucl_id:
54
- for summary in summaries:
55
- if summary.get('Caption', '').startswith(('XM_', 'XR_')):
56
- nucl_id = summary['Id']
57
- break
58
-
59
- if not nucl_id:
60
- return {"error": f"基因 {gene_symbol} 找不到标准的 RefSeq 转录本 (NM_/NR_)"}
61
-
62
- # 4. 下载并解析序列
63
  handle = Entrez.efetch(db="nucleotide", id=nucl_id, rettype="gb", retmode="text")
64
  seq_record = SeqIO.read(handle, "genbank")
65
 
66
- # 5. 提取外显子
67
  junctions = []
68
  current_pos = 0
69
  for feature in seq_record.features:
@@ -72,7 +53,8 @@ def get_ready_for_primers(gene_symbol, species="human"):
72
  current_pos += (end - start)
73
  junctions.append(int(current_pos))
74
 
75
- if junctions: junctions.pop()
 
76
 
77
  return {
78
  "symbol": gene_symbol,
@@ -81,7 +63,8 @@ def get_ready_for_primers(gene_symbol, species="human"):
81
  "junctions": junctions
82
  }
83
  except Exception as e:
84
- return {"error": f"处理出错: {str(e)}"}
 
85
 
86
  def design_qpcr_primers(gene_data):
87
  """设计qPCR引物,包含失败重试机制"""
 
15
  Entrez.email = "your_email@example.com"
16
 
17
  try:
18
+ # 1. 搜索基因 ID (保持不变)
19
  search_term = f"{gene_symbol}[Gene Name] AND {species}[Organism] AND alive[prop]"
20
  handle = Entrez.esearch(db="gene", term=search_term)
21
  record = Entrez.read(handle)
 
23
  return {"error": f"未找到基因: {gene_symbol}"}
24
  gene_id = record["IdList"][0]
25
 
26
+ # 2. 获取核苷酸 ID
27
+ # 核心修复点:使用 "refseq[filter] AND RNA[filter]"
28
+ # 这会同时命中 NM_ (mRNA) NR_ (ncRNA),且排除掉染色体大片段
29
+ link_handle = Entrez.elink(
30
+ dbfrom="gene",
31
+ db="nucleotide",
32
+ id=gene_id,
33
+ term="refseq[filter] AND RNA[filter]"
34
+ )
35
  link_record = Entrez.read(link_handle)
36
 
37
+ # 检查是否有返回结果,防止 list index out of range
38
+ if not link_record[0]["LinkSetDb"]:
39
+ return {"error": f"基因 {gene_symbol} 找不到标准的 RefSeq 转录本"}
 
 
40
 
41
+ nucl_id = link_record[0]["LinkSetDb"][0]["Link"][0]["Id"]
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
+ # 3. 下载并解析 (恢复你最开始本)
 
 
 
 
 
 
 
 
 
 
44
  handle = Entrez.efetch(db="nucleotide", id=nucl_id, rettype="gb", retmode="text")
45
  seq_record = SeqIO.read(handle, "genbank")
46
 
47
+ # 4. 提取外显子
48
  junctions = []
49
  current_pos = 0
50
  for feature in seq_record.features:
 
53
  current_pos += (end - start)
54
  junctions.append(int(current_pos))
55
 
56
+ if junctions:
57
+ junctions.pop() # 最后一个点不是交界点
58
 
59
  return {
60
  "symbol": gene_symbol,
 
63
  "junctions": junctions
64
  }
65
  except Exception as e:
66
+ # 这里的报错能帮我们准确定位是哪一步
67
+ return {"error": f"获取 {gene_symbol} 失败: {str(e)}"}
68
 
69
  def design_qpcr_primers(gene_data):
70
  """设计qPCR引物,包含失败重试机制"""