Spaces:

lllchenlll
/

test

Runtime error

App Files Files Community

test / app.py

lllchenlll

Update app.py

054d7f8 over 2 years ago

raw

history blame contribute delete

3.94 kB

	import gradio as gr
	import numpy as np
	import openai

	from sentence_transformers import SentenceTransformer
	from langchain.prompts import PromptTemplate
	from collections import Counter


	def process(api, caption, category, asr, ocr):
	openai.api_key = api
	preference = "兴趣标签"
	example = "例如，给定一个视频，它的\"标题\"为\"长安系最便宜的轿车，4W起很多人都看不上它，但我知道车只是代步工具，又需要什么面子呢" \
	"！\"，\"类别\"为\"汽车\"，\"ocr\"为\"长安系最便宜的一款轿车\"，\"asr\"为\"我不否认现在的国产和合资还有一定的差距，" \
	"但确实是他们让我们5万开了MP V8万开上了轿车，10万开张了ICV15万开张了大七座。\"，\"{}\"生成机器人推断出合理的\"{}\"为\"" \
	"长安轿车报价、最便宜的长安轿车、新款长安轿车\"。".format(preference, preference)

	prompt = PromptTemplate(
	input_variables=["preference", "caption", "ocr", "asr", "category", "example"],
	template="你是一个视频的\"{preference}\"生成机器人，根据输入的视频标题、类别、ocr、asr推理出合理的\"{preference}\"，以多个多"
	"于两字的标签形式进行表达，以顿号隔开。{example}那么，给定一个新的视频，它的\"标题\"为\"{caption}\"，\"类别\"为"
	"\"{category}\"，\"ocr\"为\"{ocr}\"，\"asr\"为\"{asr}\"，请推断出该视频的\"{preference}\"："
	)

	text = prompt.format(preference=preference, caption=caption, category=category, ocr=ocr, asr=asr, example=example)

	try:
	completion = openai.ChatCompletion.create(
	model="gpt-3.5-turbo",
	messages=[{"role": "user", "content": text}],
	temperature=1.5,
	n=5
	)

	res = []
	for j in range(5):
	ans = completion.choices[j].message["content"].strip()
	ans = ans.replace("\n", "")
	ans = ans.replace("。", "")
	ans = ans.replace("，", "、")
	res += ans.split('、')

	tag_count = Counter(res)
	tag_count = sorted(tag_count.items(), key=lambda x: x[1], reverse=True)[:10]

	tags_embed = np.load('./tag_data/tags_embed.npy')
	tags_dis = np.load('./tag_data/tags_dis.npy')

	candidate_tags = [_[0] for _ in tag_count]
	encoder = SentenceTransformer("hfl/chinese-roberta-wwm-ext-large")
	candidate_tags_embed = encoder.encode(candidate_tags)
	candidate_tags_dis = [np.sqrt(np.dot(_, _.T)) for _ in candidate_tags_embed]

	scores = np.dot(candidate_tags_embed, tags_embed.T)
	f = open('./tag_data/tags.txt', 'r')
	all_tags = []
	for line in f.readlines():
	all_tags.append(line.strip())
	f.close()

	final_ans = []
	for i in range(scores.shape[0]):
	for j in range(scores.shape[1]):
	score = scores[i][j] / (candidate_tags_dis[i] * tags_dis[j])
	if score > 0.8:
	final_ans.append(all_tags[j])

	print(final_ans)

	final_ans = Counter(final_ans)
	final_ans = sorted(final_ans.items(), key=lambda x: x[1], reverse=True)[:5]
	final_ans = [_[0] for _ in final_ans]

	return "、".join(final_ans)

	except:
	return 'api error'


	with gr.Blocks() as demo:
	text_api = gr.Textbox(label='OpenAI API key')
	text_caption = gr.Textbox(label='Caption')
	text_category = gr.Textbox(label='Category')
	text_asr = gr.Textbox(label='ASR')
	text_ocr = gr.Textbox(label='OCR')

	text_output = gr.Textbox(value='', label='Output')

	btn = gr.Button(value='Submit')
	btn.click(process, inputs=[text_api, text_caption, text_category, text_asr, text_ocr], outputs=[text_output])


	if __name__ == "__main__":
	demo.launch(share=True)