FasterPrimer / app.py
PharC's picture
Update app.py
b16b331 verified
from flask import Flask, render_template, request, jsonify, send_file
from Bio import Entrez, SeqIO
import primer3
import ssl
import pandas as pd
import io
import json
from datetime import datetime
import time
app = Flask(__name__)
ssl._create_default_https_context = ssl._create_unverified_context
def get_ready_for_primers(gene_symbol, species="human"):
"""获取基因序列和外显子交界点信息"""
Entrez.email = "your_email@example.com"
# 1. 搜索基因并获取 NCBI 内部 ID
search_term = f"{gene_symbol}[Gene Name] AND {species}[Organism]"
handle = Entrez.esearch(db="gene", term=search_term)
record = Entrez.read(handle)
if not record["IdList"]:
return {"error": "未找到该基因"}
gene_id = record["IdList"][0]
# 2. 获取该基因关联的 NM_ 编号
link_handle = Entrez.elink(dbfrom="gene", db="nucleotide", id=gene_id, term="srcdb_refseq[prop] AND mRNA[filter]")
link_record = Entrez.read(link_handle)
try:
# 获取第一个关联的核苷酸 UID
nucl_id = link_record[0]["LinkSetDb"][0]["Link"][0]["Id"]
# 3. 下载完整的 GenBank 格式数据
handle = Entrez.efetch(db="nucleotide", id=nucl_id, rettype="gb", retmode="text")
seq_record = SeqIO.read(handle, "genbank")
# 提取外显子分界点
junctions = []
current_pos = 0
for feature in seq_record.features:
if feature.type == "exon":
start, end = feature.location.start, feature.location.end
current_pos += (end - start)
junctions.append(int(current_pos))
if junctions:
junctions.pop() # 移除最后一个边界
return {
"symbol": gene_symbol,
"nm_id": seq_record.id,
"sequence": str(seq_record.seq),
"junctions": junctions
}
except Exception as e:
return {"error": f"获取基因信息失败: {str(e)}"}
def design_qpcr_primers(gene_data):
"""设计qPCR引物,包含失败重试机制"""
if "error" in gene_data:
return gene_data
# 基础序列参数
seq_args = {
'SEQUENCE_ID': gene_data['nm_id'],
'SEQUENCE_TEMPLATE': gene_data['sequence'],
}
# 基础约束参数
global_args = {
'PRIMER_OPT_SIZE': 20,
'PRIMER_MIN_SIZE': 18,
'PRIMER_MAX_SIZE': 25,
'PRIMER_OPT_TM': 60.0,
'PRIMER_MIN_TM': 57.0,
'PRIMER_MAX_TM': 63.0,
'PRIMER_TM_MAX_DIFF': 2.0, # 放宽一点点 Tm 差限制
'PRIMER_MIN_GC': 30.0, # 放宽 GC 下限
'PRIMER_MAX_GC': 70.0, # 放宽 GC 上限
'PRIMER_PRODUCT_SIZE_RANGE': [80, 250], # qPCR 通常不需要太长的产物
'PRIMER_NUM_RETURN': 5,
}
try:
# 尝试 1: 强制要求跨外显子交界
# 注意:这里我们只取前几个交界点,有时太多点会导致搜索空间太小
junctions = gene_data.get('junctions', [])
if junctions:
seq_args['SEQUENCE_OVERLAP_JUNCTION_LIST'] = junctions
try:
results = primer3.bindings.design_primers(seq_args, global_args)
except Exception:
# 尝试 2: 如果失败,去掉跨外显子限制,进行普通设计
if 'SEQUENCE_OVERLAP_JUNCTION_LIST' in seq_args:
del seq_args['SEQUENCE_OVERLAP_JUNCTION_LIST']
results = primer3.bindings.design_primers(seq_args, global_args)
else:
raise
primer_pairs = []
for i in range(global_args['PRIMER_NUM_RETURN']):
try:
# 检查这个引物是否真的跨越了交界点
# (在返回结果中查看是否有指定标志,或者统一标记)
is_junction = "是" if 'SEQUENCE_OVERLAP_JUNCTION_LIST' in seq_args else "否(普通设计)"
pair = {
"id": i + 1,
"forward": results[f'PRIMER_LEFT_{i}_SEQUENCE'],
"reverse": results[f'PRIMER_RIGHT_{i}_SEQUENCE'],
"f_tm": f"{results[f'PRIMER_LEFT_{i}_TM']:.2f}",
"r_tm": f"{results[f'PRIMER_RIGHT_{i}_TM']:.2f}",
"product_size": results[f'PRIMER_PAIR_{i}_PRODUCT_SIZE'],
"junction_info": is_junction
}
primer_pairs.append(pair)
except KeyError:
break
if not primer_pairs:
return {"error": "无法找到符合条件的引物,请尝试放宽筛选标准"}
return {"primers": primer_pairs, "gene_info": gene_data}
except Exception as e:
return {"error": f"引物设计深度失败: {str(e)}"}
@app.route('/')
def index():
return render_template('index.html')
@app.route('/design_primers', methods=['POST'])
def design_primers_api():
data = request.json
gene_symbol = data.get('gene_symbol', '').strip()
species = data.get('species', 'human')
if not gene_symbol:
return jsonify({"error": "请输入基因名称"})
# 获取基因信息
gene_data = get_ready_for_primers(gene_symbol, species)
# 设计引物
result = design_qpcr_primers(gene_data)
return jsonify(result)
@app.route('/batch_design_primers', methods=['POST'])
def batch_design_primers_api():
data = request.json
gene_list = data.get('gene_list', [])
species = data.get('species', 'human')
if not gene_list:
return jsonify({"error": "请输入基因列表"})
results = []
for gene_symbol in gene_list:
gene_symbol = gene_symbol.strip()
if not gene_symbol:
continue
time.sleep(1)
# 获取基因信息
gene_data = get_ready_for_primers(gene_symbol, species)
# 设计引物
result = design_qpcr_primers(gene_data)
if "error" in result:
results.append({
"gene": gene_symbol,
"status": "failed",
"error": result["error"]
})
else:
results.append({
"gene": gene_symbol,
"status": "success",
"data": result
})
return jsonify({"results": results})
@app.route('/export_primers', methods=['POST'])
def export_primers():
data = request.json
export_format = data.get('format', 'excel') # excel, csv, json
results_data = data.get('data', [])
if export_format == 'excel':
return export_to_excel(results_data)
elif export_format == 'csv':
return export_to_csv(results_data)
elif export_format == 'json':
return export_to_json(results_data)
else:
return jsonify({"error": "不支持的导出格式"})
def export_to_excel(results_data):
"""导出为Excel格式"""
rows = []
for result in results_data:
if result.get('status') == 'success':
gene_info = result['data']['gene_info']
primers = result['data']['primers']
for primer in primers:
rows.append({
'基因名称': gene_info['symbol'],
'RefSeq ID': gene_info['nm_id'],
'引物对编号': primer['id'],
'正向引物序列': primer['forward'],
'反向引物序列': primer['reverse'],
'正向引物Tm(°C)': primer['f_tm'],
'反向引物Tm(°C)': primer['r_tm'],
'产物长度(bp)': primer['product_size'],
'外显子交界点': ', '.join(map(str, gene_info['junctions'])),
'设计时间': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
})
else:
rows.append({
'基因名称': result['gene'],
'RefSeq ID': 'N/A',
'引物对编号': 'N/A',
'正向引物序列': 'N/A',
'反向引物序列': 'N/A',
'正向引物Tm(°C)': 'N/A',
'反向引物Tm(°C)': 'N/A',
'产物长度(bp)': 'N/A',
'外显子交界点': 'N/A',
'设计时间': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
'错误信息': result.get('error', '未知错误')
})
df = pd.DataFrame(rows)
# 创建Excel文件
output = io.BytesIO()
with pd.ExcelWriter(output, engine='openpyxl') as writer:
df.to_excel(writer, sheet_name='引物设计结果', index=False)
output.seek(0)
filename = f"qPCR_primers_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
return send_file(
output,
mimetype='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
as_attachment=True,
download_name=filename
)
def export_to_csv(results_data):
"""导出为CSV格式"""
rows = []
for result in results_data:
if result.get('status') == 'success':
gene_info = result['data']['gene_info']
primers = result['data']['primers']
for primer in primers:
rows.append({
'基因名称': gene_info['symbol'],
'RefSeq ID': gene_info['nm_id'],
'引物对编号': primer['id'],
'正向引物序列': primer['forward'],
'反向引物序列': primer['reverse'],
'正向引物Tm(°C)': primer['f_tm'],
'反向引物Tm(°C)': primer['r_tm'],
'产物长度(bp)': primer['product_size'],
'外显子交界点': ', '.join(map(str, gene_info['junctions'])),
'设计时间': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
})
df = pd.DataFrame(rows)
output = io.StringIO()
df.to_csv(output, index=False, encoding='utf-8-sig')
filename = f"qPCR_primers_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
return send_file(
io.BytesIO(output.getvalue().encode('utf-8-sig')),
mimetype='text/csv',
as_attachment=True,
download_name=filename
)
def export_to_json(results_data):
"""导出为JSON格式"""
export_data = {
"export_time": datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
"total_genes": len(results_data),
"results": results_data
}
filename = f"qPCR_primers_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
return send_file(
io.BytesIO(json.dumps(export_data, ensure_ascii=False, indent=2).encode('utf-8')),
mimetype='application/json',
as_attachment=True,
download_name=filename
)
if __name__ == '__main__':
app.run(debug=True, host='0.0.0.0', port=5000)