Spaces:

OrionStarAI
/

Orion-14B-App-Demo-CN

Runtime error

App Files Files Community

Orion-14B-App-Demo-CN / app.py

xiaoximew

Update

04aa62c 10 months ago

raw

history blame

22.1 kB

	import os
	from typing import Dict

	import gradio as gr
	import pandas as pd

	from chat_task.chat import generate_chat
	from doc_qa_task.doc_qa import generate_doc_qa
	from examples import (
	load_examples,
	preprocess_docqa_examples,
	preprocess_extraction_examples,
	preprocess_qa_generator_examples,
	)
	from extract_data_task.extract import extract_slots
	from plugin_task.api import api_plugin_chat
	from qa_generator_task.generate_qa import generate_qa_pairs
	from plugin_task.plugins import PLUGIN_JSON_SCHEMA


	abs_path = os.path.abspath(__file__)
	current_dir = os.path.dirname(abs_path)
	statistic_path = os.path.join(current_dir, "images")

	load_examples()


	def clear_session():
	"""Clears the chat session."""
	return "", None


	def clear_plugin_session(session: Dict):
	"""Clears the plugin session."""
	session.clear()
	return session, None, None


	def show_custom_fallback_textbox(x):
	if x == "自定义话术":
	return [gr.Row(visible=True), gr.Textbox()]
	else:
	return [gr.Row(visible=False), gr.Textbox()]


	def validate_field_word_count(
	input_text: str, description: str, max_word_count: int = 3000
	):
	"""
	Validate the input text for word count

	:param input_text:
	:return:
	"""
	if len(input_text) == 0:
	raise gr.Error(f"{description}不能为空")

	if len(input_text) > max_word_count:
	raise gr.Error(f"{description}字数不能超过{max_word_count}字")


	def validate_chat(input_text: str):
	"""
	Validate the input text

	:param input_text:
	:return:
	"""
	validate_field_word_count(input_text, "输入", 500)


	def validate_doc_qa(
	input_text: str,
	doc_df: "pd.DataFrame",
	fallback_ratio: str,
	fallback_text_input: str,
	):
	"""
	Validate fields of doc_qa
	:param input_text:
	:param doc_df:
	:param fallback_ratio:
	:param fallback_text_input:
	:return:
	"""
	# add all the doc ids to the input text
	if fallback_ratio == "自定义话术":
	validate_field_word_count(fallback_text_input, "自定义话术", 100)

	validate_field_word_count(input_text, "输入", 500)

	page_content_full_text = (
	" ".join(doc_df["文档片段名称"].tolist())
	+ " "
	+ " ".join(doc_df["文档片段内容"].tolist())
	)
	validate_field_word_count(page_content_full_text, "文档信息", 2500)


	def validate_qa_pair_generator(input_text: str):
	"""
	Validate the input text

	:param input_text:
	:return:
	"""
	return validate_field_word_count(input_text, "输入")


	def validate_extraction(
	input_text: str,
	extraction_df: "pd.DataFrame",
	):
	"""
	Validate fields of extraction
	"""
	extraction_full_text = (
	" ".join(extraction_df["字段名称"].tolist())
	+ " "
	+ " ".join(extraction_df["字段描述"].tolist())
	)
	validate_field_word_count(input_text, "输入", 1500)
	validate_field_word_count(extraction_full_text, "待抽取字段描述", 1500)


	def validate_plugin(input_text: str):
	"""
	Validate the input text

	:param input_text:
	:return:
	"""
	validate_field_word_count(input_text, "输入", 500)


	with gr.Blocks(
	title="Orion-14B",
	theme="shivi/calm_seafoam@>=0.0.1,<1.0.0",
	) as demo:

	def user(user_message, history):
	return user_message, (history or []) + [[user_message, ""]]

	gr.Markdown(
	"""
	<div style="overflow: hidden;color:#fff;display: flex;flex-direction: column;align-items: center; position: relative; width: 100%; height: 180px;background-size: cover; background-image: url(https://www.orionstar.com/res/orics/down/ow001_20240119_8369eca9013416109a2303bf4e329140.png);">
	<img style="width: 130px;height: 60px;position: absolute;top:10px;left:10px" src="https://www.orionstar.com/res/orics/down/ow001_20240119_1236eba7ea0ac15931f4518d7f211d47.png"/>
	<img style="min-width: 1416px; width: 1416px;height: 100px;margin-top: 30px;" src="https://www.orionstar.com/res/orics/down/ow001_20240119_10c5ca12a57116bda0e35916a28b247f.png"/>
	<span style="margin-top: 10px;font-size: 12px;">请在<a href="https://github.com/OrionStarAI/Orion" style="color: white;">Github</a>点击Star支持我们，加入<a href="https://www.orionstar.com/res/orics/down/ow001_20240122_d87e5b4ea66a31493c38fcffe7bdb453.png" style="color: white;">官方微信交流群</a></span>
	</div>
	"""
	)
	with gr.Tab("基础能力"):
	chatbot = gr.Chatbot(
	label="Orion-14B-Chat",
	elem_classes="control-height",
	show_copy_button=True,
	min_width=1368,
	height=416,
	)
	chat_text_input = gr.Textbox(label="输入", min_width=1368)

	with gr.Row():
	with gr.Column(scale=2):
	gr.Examples(
	[
	"可以给我讲个笑话吗？",
	"什么是伟大的诗歌？",
	"你知道李白吗？",
	"黑洞是如何工作的？",
	"在表中插入一条数据，id为1，name为张三，age为18，请问SQL语句是什么？",
	],
	chat_text_input,
	label="试试问",
	)
	with gr.Column(scale=1):
	with gr.Row(variant="compact"):
	clear_history = gr.Button(
	"清除历史",
	min_width="17",
	size="sm",
	scale=1,
	icon=os.path.join(statistic_path, "clear.png"),
	)
	submit = gr.Button(
	"发送",
	variant="primary",
	min_width="17",
	size="sm",
	scale=1,
	icon=os.path.join(statistic_path, "send.svg"),
	)

	chat_text_input.submit(
	fn=validate_chat, inputs=[chat_text_input], outputs=[], queue=False
	).success(
	user, [chat_text_input, chatbot], [chat_text_input, chatbot], queue=False
	).success(
	fn=generate_chat,
	inputs=[chat_text_input, chatbot],
	outputs=[chat_text_input, chatbot],
	)

	submit.click(
	fn=validate_chat, inputs=[chat_text_input], outputs=[], queue=False
	).success(
	user, [chat_text_input, chatbot], [chat_text_input, chatbot], queue=False
	).success(
	fn=generate_chat,
	inputs=[chat_text_input, chatbot],
	outputs=[chat_text_input, chatbot],
	api_name="chat",
	)

	clear_history.click(
	fn=clear_session, inputs=[], outputs=[chat_text_input, chatbot], queue=False
	)

	with gr.Tab("基于文档问答"):
	with gr.Row():
	with gr.Column(scale=3, min_width=357, variant="panel"):
	gr.Markdown(
	'<span style="color:rgba(0, 0, 0, 0.5); font-size: 14px; font-weight: 400; line-height: 28px; letter-spacing: 0em; text-align: left; width: 42px; height: 14px; left: 36px; top: 255px;">配置项</span>'
	)
	citations_radio = gr.Radio(
	["开启引用", "关闭引用"], label="引用", value="关闭引用"
	)
	fallback_radio = gr.Radio(
	["使用大模型知识", "自定义话术"],
	label="超纲问题回复",
	value="自定义话术",
	)
	fallback_text_input = gr.Textbox(
	label="自定义话术",
	value="抱歉，我还在学习中，暂时无法回答您的问题。",
	)

	gr.Markdown(
	'<span style="color:rgba(0, 0, 0, 0.5); font-size: 14px; font-weight: 400; line-height: 28px; letter-spacing: 0em; text-align: left; width: 42px; height: 14px; left: 36px; top: 255px;">文档信息</span>'
	)

	doc_df = gr.Dataframe(
	headers=["文档片段内容", "文档片段名称"],
	datatype=["str", "str"],
	row_count=6,
	col_count=(2, "fixed"),
	label="",
	interactive=True,
	wrap=True,
	elem_classes="control-height",
	height=300,
	)

	with gr.Column(scale=2, min_width=430):
	chatbot = gr.Chatbot(
	label="适用场景：预期LLM通过自由知识回答",
	elem_classes="control-height",
	show_copy_button=True,
	min_width=999,
	height=419,
	)

	doc_qa_input = gr.Textbox(label="输入", min_width=999, max_lines=10)

	with gr.Row():
	with gr.Column(scale=2):
	gr.Examples(
	[
	"哪些情况下不能超车？",
	"参观须知",
	"青岛啤酒酒精含量是多少？",
	],
	doc_qa_input,
	label="试试问",
	cache_examples=True,
	fn=preprocess_docqa_examples,
	outputs=[doc_df],
	)
	with gr.Column(scale=1):
	with gr.Row(variant="compact"):
	clear_history = gr.Button(
	"清除历史",
	min_width="17",
	size="sm",
	scale=1,
	icon=os.path.join(statistic_path, "clear.png"),
	)
	submit = gr.Button(
	"发送",
	variant="primary",
	min_width="17",
	size="sm",
	scale=1,
	icon=os.path.join(statistic_path, "send.svg"),
	)

	doc_qa_input.submit(
	fn=validate_doc_qa,
	inputs=[
	doc_qa_input,
	doc_df,
	fallback_radio,
	fallback_text_input,
	],
	outputs=[],
	queue=False,
	).success(
	user, [doc_qa_input, chatbot], [doc_qa_input, chatbot], queue=False
	).success(
	fn=generate_doc_qa,
	inputs=[
	doc_qa_input,
	chatbot,
	doc_df,
	fallback_radio,
	fallback_text_input,
	citations_radio,
	],
	outputs=[doc_qa_input, chatbot],
	scroll_to_output=True,
	api_name="doc_qa",
	)

	submit.click(
	fn=validate_doc_qa,
	inputs=[
	doc_qa_input,
	doc_df,
	fallback_radio,
	fallback_text_input,
	],
	outputs=[],
	queue=False,
	).success(
	user, [doc_qa_input, chatbot], [doc_qa_input, chatbot], queue=False
	).success(
	fn=generate_doc_qa,
	inputs=[
	doc_qa_input,
	chatbot,
	doc_df,
	fallback_radio,
	fallback_text_input,
	citations_radio,
	],
	outputs=[doc_qa_input, chatbot],
	scroll_to_output=True,
	)

	clear_history.click(
	fn=lambda x: (None, None, None),
	inputs=[],
	outputs=[doc_df, doc_qa_input, chatbot],
	queue=False,
	)

	with gr.Tab("插件能力"):
	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown(
	'<span style="color:rgba(0, 0, 0, 0.5); font-size: 14px; font-weight: 400; line-height: 28px; letter-spacing: 0em; text-align: left; width: 42px; height: 14px; left: 36px; top: 255px;">配置项</span>'
	)

	radio_plugins = [
	gr.Radio(
	["开启", "关闭"],
	label=plugin_json["name_for_human"],
	value="开启",
	)
	for plugin_json in PLUGIN_JSON_SCHEMA
	]

	with gr.Column(scale=3):
	session = gr.State(value=dict())
	chatbot = gr.Chatbot(
	label="适用场景:需要LLM调用API解决问题",
	elem_classes="control-height",
	show_copy_button=True,
	)
	plugin_text_input = gr.Textbox(label="输入")
	with gr.Row():
	with gr.Column(scale=2):
	gr.Examples(
	[
	"北京天气怎么样？",
	"查询物流信息",
	"每日壁纸",
	"bing今天的壁纸是什么",
	"查询手机号码归属地",
	],
	plugin_text_input,
	label="试试问",
	)
	with gr.Column(scale=1):
	with gr.Row(variant="compact"):
	clear_history = gr.Button(
	"清除历史",
	min_width="17",
	size="sm",
	scale=1,
	icon=os.path.join(statistic_path, "clear.png"),
	)
	submit = gr.Button(
	"发送",
	variant="primary",
	min_width="17",
	size="sm",
	scale=1,
	icon=os.path.join(statistic_path, "send.svg"),
	)

	plugin_text_input.submit(
	fn=validate_plugin,
	inputs=[
	plugin_text_input,
	],
	outputs=[],
	queue=False,
	).success(
	user,
	[plugin_text_input, chatbot],
	[plugin_text_input, chatbot],
	scroll_to_output=True,
	).success(
	fn=api_plugin_chat,
	inputs=[session, plugin_text_input, chatbot, *radio_plugins],
	outputs=[session, plugin_text_input, chatbot],
	scroll_to_output=True,
	)

	submit.click(
	fn=validate_plugin,
	inputs=[
	plugin_text_input,
	],
	outputs=[],
	queue=False,
	).success(
	user,
	[plugin_text_input, chatbot],
	[plugin_text_input, chatbot],
	scroll_to_output=True,
	).success(
	fn=api_plugin_chat,
	inputs=[session, plugin_text_input, chatbot, *radio_plugins],
	outputs=[session, plugin_text_input, chatbot],
	api_name="plugin",
	scroll_to_output=True,
	)

	clear_history.click(
	fn=clear_plugin_session,
	inputs=[session],
	outputs=[session, plugin_text_input, chatbot],
	queue=False,
	)
	with gr.Tab("生成QA对"):
	with gr.Row(equal_height=True):
	qa_generator_output = gr.Code(
	language="json",
	show_label=False,
	min_width=1368,
	)
	with gr.Row():
	qa_generator_input = gr.Textbox(
	label="输入",
	show_label=True,
	info="",
	min_width=1368,
	lines=5,
	max_lines=10,
	)

	with gr.Row():
	with gr.Column(scale=2):
	gr.Examples(
	[
	"第一章总则 \n第...",
	"金字塔，在建筑学上是...",
	"山西老陈醋是以高粱、...",
	"室内装饰构造虚拟仿真...",
	"猎户星空（Orion...",
	],
	qa_generator_input,
	label="试试问",
	cache_examples=True,
	fn=preprocess_qa_generator_examples,
	outputs=[qa_generator_input],
	)
	with gr.Column(scale=1):
	with gr.Row(variant="compact"):
	clear = gr.Button(
	"清除",
	min_width="17",
	size="sm",
	scale=1,
	icon=os.path.join(statistic_path, "clear.png"),
	)
	submit = gr.Button(
	"发送",
	variant="primary",
	min_width="17",
	size="sm",
	scale=1,
	icon=os.path.join(statistic_path, "send.svg"),
	)

	submit.click(
	fn=validate_qa_pair_generator,
	inputs=[qa_generator_input],
	outputs=[],
	).success(
	fn=generate_qa_pairs,
	inputs=[qa_generator_input],
	outputs=[qa_generator_output, qa_generator_input],
	scroll_to_output=True,
	api_name="qa_generator",
	)

	clear.click(
	fn=lambda x: ("", ""),
	inputs=[],
	outputs=[qa_generator_input, qa_generator_output],
	queue=False,
	)

	with gr.Tab("抽取数据"):
	extract_outpu_df = gr.Dataframe(
	label="",
	headers=["字段名称", "字段抽取结果"],
	datatype=["str", "str"],
	col_count=(2, "fixed"),
	wrap=True,
	elem_classes="control-height",
	height=234,
	row_count=5,
	)

	extract_input = gr.Textbox(label="输入", lines=5, min_width=1368, max_lines=10)

	extraction_df = gr.Dataframe(
	headers=["字段名称", "字段描述"],
	datatype=["str", "str"],
	row_count=3,
	col_count=(2, "fixed"),
	label="",
	interactive=True,
	wrap=True,
	elem_classes="control-height",
	height=180,
	)

	with gr.Row():
	with gr.Column(scale=2):
	gr.Examples(
	["第一条合同当...", "发票编号: IN...", "发件人：John..."],
	extract_input,
	label="试试问",
	cache_examples=True,
	fn=preprocess_extraction_examples,
	outputs=[extract_input, extraction_df],
	)
	with gr.Column(scale=1):
	with gr.Row(variant="compact"):
	clear = gr.Button(
	"清除历史",
	min_width="17",
	size="sm",
	scale=1,
	icon=os.path.join(statistic_path, "clear.png"),
	)
	submit = gr.Button(
	"发送",
	variant="primary",
	min_width="17",
	size="sm",
	scale=1,
	icon=os.path.join(statistic_path, "send.svg"),
	)

	submit.click(
	fn=validate_extraction,
	inputs=[extract_input, extraction_df],
	outputs=[],
	).success(
	fn=extract_slots,
	inputs=[extract_input, extraction_df],
	outputs=[extract_outpu_df],
	scroll_to_output=True,
	api_name="extract",
	)

	clear.click(
	fn=lambda x: ("", None, None),
	inputs=[],
	outputs=[
	extract_input,
	extraction_df,
	extract_outpu_df,
	],
	queue=False,
	)


	if __name__ == "__main__":
	demo.queue(api_open=False, max_size=40).launch(
	height=800,
	share=False,
	server_name="0.0.0.0",
	show_api=False,
	max_threads=4,
	)