jasonyu2015's picture
Update app.py
8d2bee8 verified
import gradio as gr
import pandas as pd
import tempfile
import os
from pathlib import Path
from docx import Document
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
import requests
import json
import websocket
import datetime
import hashlib
import hmac
import base64
from urllib.parse import urlparse, urlencode
from wsgiref.handlers import format_date_time
from time import mktime
import PyPDF2
import re
import io
# 星火认知大模型配置
SPARKAI_URL = 'wss://spark-api.xf-yun.com/v4.0/chat'
SPARKAI_APP_ID = os.getenv('SPARKAI_APP_ID', '35943c2b')
SPARKAI_API_SECRET = os.getenv('SPARKAI_API_SECRET', 'MmY4ZTBiYzZhNTJhOTMzMTY5MWZkNmFi')
SPARKAI_API_KEY = os.getenv('SPARKAI_API_KEY', 'd8c7001937dc33d1ec74aef5030ed816')
SPARKAI_DOMAIN = '4.0Ultra'
# 语言翻译字典
TRANSLATIONS = {
"中文": {
"title": "AI职升姬",
"subtitle": "输入您的简历信息,获取AI优化和生成的简历",
"name": "姓名",
"gender": "性别",
"birthdate": "出生年月",
"phone": "电话",
"email": "邮件",
"preferred_locations": "期望工作地",
"political_status": "政治面貌",
"hometown": "籍贯",
"education": "教育经历",
"school": "学校名称",
"degree": "学历",
"major": "专业",
"time": "时间",
"experience": "在校经历",
"optimize_edu": "AI优化在校经历",
"add_education": "添加教育经历",
"delete_education": "删除最后一条教育经历",
"work_experience": "实践经验",
"company": "公司/组织",
"position": "职位",
"work_content": "工作内容",
"optimize_exp": "AI优化工作内容",
"add_experience": "添加实践经验",
"delete_experience": "删除最后一条实践经验",
"skills": "技能和获奖情况",
"skill_name": "技能/奖项名称",
"description": "描述",
"optimize_skill": "AI优化描述",
"add_skill": "添加技能/奖项",
"delete_skill": "删除最后一条技能/奖项",
"self_evaluation": "自我评价",
"optimize_self": "AI优化自我评价",
"file_format": "导出文件格式",
"generate_resume": "生成简历",
"preview": "简历预览",
"download": "下载生成的简历",
"male": "男",
"female": "女",
"bachelor": "学士",
"master": "硕士",
"phd": "博士",
"other": "其他",
"crowd": "群众",
"league_member": "团员",
"party_member": "党员"
},
"English": {
"title": "AI Resume Booster",
"subtitle": "Enter your resume information to get AI-optimized and generated resume",
"name": "Name",
"gender": "Gender",
"birthdate": "Date of Birth",
"phone": "Phone",
"email": "Email",
"preferred_locations": "Preferred Work Locations",
"political_status": "Political Status",
"hometown": "Hometown",
"education": "Education",
"school": "School Name",
"degree": "Degree",
"major": "Major",
"time": "Time",
"experience": "School Experience",
"optimize_edu": "AI Optimize School Experience",
"add_education": "Add Education",
"delete_education": "Delete Last Education Entry",
"work_experience": "Work Experience",
"company": "Company/Organization",
"position": "Position",
"work_content": "Work Content",
"optimize_exp": "AI Optimize Work Content",
"add_experience": "Add Experience",
"delete_experience": "Delete Last Experience Entry",
"skills": "Skills and Awards",
"skill_name": "Skill/Award Name",
"description": "Description",
"optimize_skill": "AI Optimize Description",
"add_skill": "Add Skill/Award",
"delete_skill": "Delete Last Skill/Award Entry",
"self_evaluation": "Self Evaluation",
"optimize_self": "AI Optimize Self Evaluation",
"file_format": "Export File Format",
"generate_resume": "Generate Resume",
"preview": "Resume Preview",
"download": "Download Generated Resume",
"male": "Male",
"female": "Female",
"bachelor": "Bachelor",
"master": "Master",
"phd": "PhD",
"other": "Other",
"crowd": "Crowd",
"league_member": "League Member",
"party_member": "Party Member"
}
}
class Ws_Param(object):
def __init__(self, APPID, APIKey, APISecret, Spark_url):
self.APPID = APPID
self.APIKey = APIKey
self.APISecret = APISecret
self.host = urlparse(Spark_url).netloc
self.path = urlparse(Spark_url).path
self.Spark_url = Spark_url
def create_url(self):
now = datetime.datetime.now()
date = format_date_time(mktime(now.timetuple()))
signature_origin = "host: " + self.host + "\n"
signature_origin += "date: " + date + "\n"
signature_origin += "GET " + self.path + " HTTP/1.1"
signature_sha = hmac.new(self.APISecret.encode('utf-8'), signature_origin.encode('utf-8'),
digestmod=hashlib.sha256).digest()
signature_sha_base64 = base64.b64encode(signature_sha).decode(encoding='utf-8')
authorization_origin = f'api_key="{self.APIKey}", algorithm="hmac-sha256", headers="host date request-line", signature="{signature_sha_base64}"'
authorization = base64.b64encode(authorization_origin.encode('utf-8')).decode(encoding='utf-8')
v = {
"authorization": authorization,
"date": date,
"host": self.host
}
url = self.Spark_url + '?' + urlencode(v)
return url
def gen_params(appid, query, domain):
data = {
"header": {
"app_id": appid,
"uid": "1234"
},
"parameter": {
"chat": {
"domain": domain,
"random_threshold": 0.5,
"max_tokens": 2048,
"auditing": "default"
}
},
"payload": {
"message": {
"text": [
{"role": "user", "content": query}
]
}
}
}
return data
def run_spark_api(query):
wsParam = Ws_Param(SPARKAI_APP_ID, SPARKAI_API_KEY, SPARKAI_API_SECRET, SPARKAI_URL)
websocket.enableTrace(False)
wsUrl = wsParam.create_url()
ws = websocket.create_connection(wsUrl)
data = json.dumps(gen_params(SPARKAI_APP_ID, query, SPARKAI_DOMAIN))
ws.send(data)
response = ""
while True:
result = ws.recv()
data = json.loads(result)
code = data['header']['code']
if code != 0:
print(f'请求错误: {code}, {data}')
break
else:
choices = data["payload"]["choices"]
status = choices["status"]
content = choices["text"][0]["content"]
response += content
if status == 2:
break
ws.close()
return response
def ai_optimize(content, field):
prompt = f"作为一名人力资源专家,请优化以下{field}内容,使其更加专业和有吸引力:\n\n{content}"
return run_spark_api(prompt)
def generate_pdf(resume_data, file_path, lang):
c = canvas.Canvas(file_path, pagesize=letter)
width, height = letter
c.setFont("Helvetica-Bold", 16)
c.drawString(50, height - 50, TRANSLATIONS[lang]["title"])
c.setFont("Helvetica", 12)
y = height - 80
for key, value in resume_data['personal_info'].items():
c.drawString(50, y, f"{TRANSLATIONS[lang][key]}: {value}")
y -= 20
sections = [
(TRANSLATIONS[lang]["education"], resume_data['education']),
(TRANSLATIONS[lang]["work_experience"], resume_data['experience']),
(TRANSLATIONS[lang]["skills"], resume_data['skills'])
]
for title, data in sections:
y -= 20
c.setFont("Helvetica-Bold", 14)
c.drawString(50, y, title)
c.setFont("Helvetica", 12)
if isinstance(data, list):
for item in data:
y -= 20
c.drawString(70, y, " | ".join(str(v) for v in item.values()))
else:
text = c.beginText(70, y - 20)
text.setFont("Helvetica", 12)
text.textLines(data)
c.drawText(text)
y -= 60 # Adjust y position after drawing text
y -= 40
c.setFont("Helvetica-Bold", 14)
c.drawString(50, y, TRANSLATIONS[lang]["self_evaluation"])
c.setFont("Helvetica", 12)
text = c.beginText(70, y - 20)
text.setFont("Helvetica", 12)
text.textLines(resume_data['self_evaluation'])
c.drawText(text)
c.save()
def generate_docx(resume_data, file_path, lang):
doc = Document()
doc.add_heading(TRANSLATIONS[lang]["title"], 0)
for key, value in resume_data['personal_info'].items():
doc.add_paragraph(f"{TRANSLATIONS[lang][key]}: {value}")
sections = [
(TRANSLATIONS[lang]["education"], resume_data['education']),
(TRANSLATIONS[lang]["work_experience"], resume_data['experience']),
(TRANSLATIONS[lang]["skills"], resume_data['skills'])
]
for title, data in sections:
doc.add_heading(title, level=1)
if isinstance(data, list):
for item in data:
doc.add_paragraph(" | ".join(str(v) for v in item.values()))
else:
doc.add_paragraph(data)
doc.add_heading(TRANSLATIONS[lang]["self_evaluation"], level=1)
doc.add_paragraph(resume_data['self_evaluation'])
doc.save(file_path)
def parse_resume(content, file_type):
try:
if file_type == 'pdf':
return parse_pdf(content)
elif file_type == 'docx':
return parse_docx(content)
else:
return parse_text(content.decode('utf-8'))
except Exception as e:
return f"解析错误:{str(e)}"
def parse_pdf(content):
pdf_reader = PyPDF2.PdfReader(io.BytesIO(content))
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
return parse_text(text)
def parse_docx(content):
doc = Document(io.BytesIO(content))
text = "\n".join([paragraph.text for paragraph in doc.paragraphs])
return parse_text(text)
def parse_text(text):
patterns = {
'name': r'姓名[::]\s*(.+)',
'gender': r'性别[::]\s*(.+)',
'birthdate': r'(出生年月|生日)[::]\s*(.+)',
'phone': r'(电话|手机)[::]\s*(.+)',
'email': r'(邮箱|邮件|E-mail)[::]\s*(.+)',
'education': r'教育经历[::](.*?)(?=工作经验|技能|$)',
'experience': r'(工作经验|实习经历)[::](.*?)(?=教育背景|技能|$)',
'skills': r'(技能|专业技能)[::](.*?)(?=自我评价|$)',
'self_evaluation': r'(自我评价|自我介绍)[::](.*)'
}
parsed_data = {k: re.search(v, text, re.DOTALL) for k, v in patterns.items()}
result = {}
for k, v in parsed_data.items():
if v:
if k == 'birthdate':
result[k] = v.group(2)
elif k in ['education', 'experience', 'skills']:
result[k] = v.group(0)
else:
result[k] = v.group(1)
else:
result[k] = ''
return result
def import_resume(file_obj, pasted_text):
if file_obj is not None:
file_type = file_obj.name.split('.')[-1].lower() if hasattr(file_obj, 'name') else 'unknown'
file_content = file_obj.read() if hasattr(file_obj, 'read') else str(file_obj).encode('utf-8')
elif pasted_text:
file_type = 'text'
file_content = pasted_text.encode('utf-8')
else:
return [""] * 10 + [pd.DataFrame()] * 3 + ["请上传文件或粘贴文本"]
parsed_data = parse_resume(file_content, file_type)
if isinstance(parsed_data, str): # Error occurred
return [""] * 10 + [pd.DataFrame()] * 3 + [parsed_data]
education_data = []
experience_data = []
skill_data = []
if parsed_data.get('education'):
edu_entries = re.findall(r'(.+?大学).*?(\d{4}年\d{1,2}月).*?(\d{4}年\d{1,2}月)', parsed_data['education'], re.DOTALL)
for school, start_date, end_date in edu_entries:
education_data.append({
"学校": school,
"学历": "",
"专业": "",
"时间": f"{start_date}-{end_date}",
"在校经历": ""
})
if parsed_data.get('experience'):
exp_entries = re.findall(r'(.+?公司).*?(\d{4}年\d{1,2}月).*?(\d{4}年\d{1,2}月)', parsed_data['experience'], re.DOTALL)
for company, start_date, end_date in exp_entries:
experience_data.append({
"时间": f"{start_date}-{end_date}",
"公司/组织": company,
"职位": "",
"工作内容": ""
})
if parsed_data.get('skills'):
skills = re.findall(r'([\w\s]+)', parsed_data['skills'])
for skill in skills:
skill_data.append({
"时间": "",
"技能/奖项名称": skill.strip(),
"描述": ""
})
education_df = pd.DataFrame(education_data) if education_data else pd.DataFrame(columns=["学校", "学历", "专业", "时间", "在校经历"])
experience_df = pd.DataFrame(experience_data) if experience_data else pd.DataFrame(columns=["时间", "公司/组织", "职位", "工作内容"])
skill_df = pd.DataFrame(skill_data) if skill_data else pd.DataFrame(columns=["时间", "技能/奖项名称", "描述"])
return [
parsed_data.get('name', ''),
parsed_data.get('gender', ''),
parsed_data.get('birthdate', ''),
parsed_data.get('phone', ''),
parsed_data.get('email', ''),
[], # preferred_locations
"", # political_status
"", # hometown
education_df,
experience_df,
skill_df,
parsed_data.get('self_evaluation', ''),
"简历导入成功,请检查并补充缺失的信息。"
]
def gradio_interface(
name, gender, birthdate, phone, email, preferred_locations, political_status, hometown,
education_data, experience_data, skill_data, self_evaluation, file_format, lang
):
resume_data = {
'personal_info': {
'name': name,
'gender': gender,
'birthdate': birthdate,
'phone': phone,
'email': email,
'preferred_locations': ", ".join(preferred_locations),
'political_status': political_status,
'hometown': hometown
},
'education': education_data.to_dict('records'),
'experience': experience_data.to_dict('records'),
'skills': skill_data.to_dict('records'),
'self_evaluation': self_evaluation
}
preview = f"""
{TRANSLATIONS[lang]['name']}:
{resume_data['personal_info']}
{TRANSLATIONS[lang]['education']}:
{resume_data['education']}
{TRANSLATIONS[lang]['work_experience']}:
{resume_data['experience']}
{TRANSLATIONS[lang]['skills']}:
{resume_data['skills']}
{TRANSLATIONS[lang]['self_evaluation']}:
{resume_data['self_evaluation']}
"""
with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file_format}") as temp_file:
file_path = temp_file.name
if file_format == 'pdf':
generate_pdf(resume_data, file_path, lang)
else:
generate_docx(resume_data, file_path, lang)
return preview, file_path
def add_item(data, *args):
new_item = pd.DataFrame([list(args)], columns=data.columns)
data = pd.concat([data, new_item], ignore_index=True)
return (data,) + tuple(gr.update(value="") for _ in args)
def delete_item(data, index):
if 0 <= index < len(data):
data = data.drop(index).reset_index(drop=True)
return data
def optimize_text(text, field):
optimized = ai_optimize(text, field)
return optimized
def update_language(lang):
return [gr.update(value=TRANSLATIONS[lang][key]) for key in TRANSLATIONS[lang]]
with gr.Blocks() as iface:
lang = gr.Dropdown(choices=["中文", "English"], value="中文", label="Language/语言", allow_custom_value=True)
title = gr.Markdown("# AI职升姬")
subtitle = gr.Markdown("输入您的简历信息,获取AI优化和生成的简历")
with gr.Row():
file_upload = gr.File(label="上传简历文件(PDF或DOCX)")
text_input = gr.Textbox(label="或粘贴简历内容", lines=5)
import_btn = gr.Button("导入简历")
import_status = gr.Textbox(label="导入状态", interactive=False)
with gr.Row():
with gr.Column():
name = gr.Textbox(label="姓名")
gender = gr.Radio(["男", "女"], label="性别")
birthdate = gr.Textbox(label="出生年月")
phone = gr.Textbox(label="电话")
email = gr.Textbox(label="邮件")
preferred_locations = gr.CheckboxGroup(["北京", "上海", "广州", "深圳", "其他"], label="期望工作地")
political_status = gr.Dropdown(["群众", "团员", "党员"], label="政治面貌", allow_custom_value=True)
hometown = gr.Textbox(label="籍贯")
with gr.Accordion("教育经历"):
education_list = gr.Dataframe(
headers=["学校", "学历", "专业", "时间", "在校经历"],
label="教育经历",
interactive=True,
col_count=(5, "fixed"),
type="pandas"
)
with gr.Row():
school = gr.Textbox(label="学校名称")
degree = gr.Dropdown(["学士", "硕士", "博士", "其他"], label="学历", allow_custom_value=True)
major = gr.Textbox(label="专业")
edu_time = gr.Textbox(label="时间")
education_exp = gr.TextArea(label="在校经历")
optimize_edu_btn = gr.Button("AI优化在校经历")
add_education = gr.Button("添加教育经历")
delete_education = gr.Button("删除最后一条教育经历")
with gr.Accordion("实践经验"):
experience_list = gr.Dataframe(
headers=["时间", "公司/组织", "职位", "工作内容"],
label="实践经验",
interactive=True,
col_count=(4, "fixed"),
type="pandas"
)
with gr.Row():
exp_time = gr.Textbox(label="时间")
exp_company = gr.Textbox(label="公司/组织")
exp_position = gr.Textbox(label="职位")
exp_content = gr.TextArea(label="工作内容")
optimize_exp_btn = gr.Button("AI优化工作内容")
add_experience = gr.Button("添加实践经验")
delete_experience = gr.Button("删除最后一条实践经验")
with gr.Accordion("技能和获奖情况"):
skill_list = gr.Dataframe(
headers=["时间", "技能/奖项名称", "描述"],
label="技能和获奖情况",
interactive=True,
col_count=(3, "fixed"),
type="pandas"
)
with gr.Row():
skill_time = gr.Textbox(label="时间")
skill_name = gr.Textbox(label="技能/奖项名称")
skill_description = gr.TextArea(label="描述")
optimize_skill_btn = gr.Button("AI优化描述")
add_skill = gr.Button("添加技能/奖项")
delete_skill = gr.Button("删除最后一条技能/奖项")
self_evaluation = gr.TextArea(label="自我评价")
optimize_self_btn = gr.Button("AI优化自我评价")
file_format = gr.Radio(["pdf", "docx"], label="导出文件格式", value="pdf")
with gr.Row():
submit_btn = gr.Button("生成简历")
with gr.Row():
preview = gr.Textbox(label="简历预览", lines=10)
resume_output = gr.File(label="下载生成的简历")
# 语言切换功能
lang.change(
update_language,
inputs=[lang],
outputs=[
title, subtitle, name, gender, birthdate, phone, email, preferred_locations,
political_status, hometown, school, degree, major, edu_time, education_exp,
optimize_edu_btn, add_education, delete_education, exp_time, exp_company,
exp_position, exp_content, optimize_exp_btn, add_experience, delete_experience,
skill_time, skill_name, skill_description, optimize_skill_btn, add_skill,
delete_skill, self_evaluation, optimize_self_btn, file_format, submit_btn,
preview, resume_output, import_btn, import_status
]
)
# 连接导入按钮和导入函数
import_btn.click(
import_resume,
inputs=[file_upload, text_input],
outputs=[name, gender, birthdate, phone, email, preferred_locations, political_status,
hometown, education_list, experience_list, skill_list, self_evaluation, import_status]
)
add_education.click(
add_item,
inputs=[education_list, school, degree, major, edu_time, education_exp],
outputs=[education_list, school, degree, major, edu_time, education_exp]
)
add_experience.click(
add_item,
inputs=[experience_list, exp_time, exp_company, exp_position, exp_content],
outputs=[experience_list, exp_time, exp_company, exp_position, exp_content]
)
add_skill.click(
add_item,
inputs=[skill_list, skill_time, skill_name, skill_description],
outputs=[skill_list, skill_time, skill_name, skill_description]
)
delete_education.click(
lambda df: delete_item(df, len(df) - 1) if len(df) > 0 else df,
inputs=[education_list],
outputs=[education_list]
)
delete_experience.click(
lambda df: delete_item(df, len(df) - 1) if len(df) > 0 else df,
inputs=[experience_list],
outputs=[experience_list]
)
delete_skill.click(
lambda df: delete_item(df, len(df) - 1) if len(df) > 0 else df,
inputs=[skill_list],
outputs=[skill_list]
)
optimize_edu_btn.click(
optimize_text,
inputs=[education_exp, gr.Textbox(value="在校经历", visible=False)],
outputs=[education_exp]
)
optimize_exp_btn.click(
optimize_text,
inputs=[exp_content, gr.Textbox(value="工作内容", visible=False)],
outputs=[exp_content]
)
optimize_skill_btn.click(
optimize_text,
inputs=[skill_description, gr.Textbox(value="技能描述", visible=False)],
outputs=[skill_description]
)
optimize_self_btn.click(
optimize_text,
inputs=[self_evaluation, gr.Textbox(value="自我评价", visible=False)],
outputs=[self_evaluation]
)
submit_btn.click(
gradio_interface,
inputs=[
name, gender, birthdate, phone, email, preferred_locations, political_status, hometown,
education_list, experience_list, skill_list, self_evaluation, file_format, lang
],
outputs=[preview, resume_output]
)
# 启动Gradio接口
iface.launch()