Spaces:
Running
Running
import os | |
import gradio as gr | |
import numpy as np | |
import translators as ts | |
from PIL import Image | |
from gradio import Blocks, Button, Textbox, Row, Column, Dropdown, Examples, Audio, Markdown | |
from langchain import Cohere, LLMChain, PromptTemplate | |
from transformers import BlipProcessor, BlipForConditionalGeneration | |
from bark_speaker.txt2audio import gen_tts, AVAILABLE_PROMPTS | |
from comic_style.comic_style import inference | |
from sad_talker.src.gradio_demo import SadTalker | |
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") | |
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") | |
def translate_into_cn(source): | |
print(ts.translators_pool) | |
result = ts.translate_text(query_text=source, translator='alibaba', from_language='en', to_language='zh') | |
return result | |
def predict_step(cohere_key, img, style): | |
i_image = Image.fromarray(np.array(img), 'RGB') | |
pixel_values = processor(images=i_image, return_tensors="pt", max_length=1024, verbose=True).pixel_values | |
output = model.generate(pixel_values) | |
preds = processor.batch_decode(output, skip_special_tokens=True) | |
preds = [pred.strip() for pred in preds] | |
# 条件:严格按照要求完成任务,输出内容直接为主体内容,输出内容前后不要有其他符号,注意语句保持通顺,输出内容全部是中文," \ " 不要重复输出内容, 不需要换行,不需要有标题,不需要排版格式。" \ "\n "\n2. Give the | |
# final output content an evaluation score as required. The score range is 0-100, 0 is the worst, 100 is the best, | |
# and the score should be objective. The format is [score:xxx]. Add at the end." \ | |
question = "Requirements: \nYou are a writing master. According to the content: {}, write a 50 words essay in any " \ | |
"form, by the style of \"{}\" as the final output content. " \ | |
"\nfinal output content:" \ | |
.format(preds[0], style) | |
print("question:{}".format(question)) | |
template = """{question}""" | |
prompt = PromptTemplate(template=template, input_variables=["question"]) | |
llm = Cohere(cohere_api_key=cohere_key, model="command", temperature=0.3, verbose=True) | |
llm_chain = LLMChain(prompt=prompt, llm=llm) | |
result = llm_chain.run(question) | |
print("result:{}".format(result)) | |
# result = llm.generate([prompt]) | |
return preds[0], translate_into_cn(result) | |
sad_talker = SadTalker(lazy_load=True) | |
with Blocks() as demo: | |
with Row(): | |
with Column(scale=1): | |
Markdown("[Cohere](https://dashboard.cohere.ai/)") | |
cohere_key = gr.Text(label="Cohere Key:") | |
Markdown("Scene 1:Img2Img(图生图)") | |
with Row(): | |
image_upload = gr.Image(type="pil", label="Essay Image") | |
comic_style_output = gr.Image(type="filepath", label="Comic Style") | |
Examples( | |
examples=[os.path.join(os.path.dirname(__file__), "example1.jpeg"), | |
os.path.join(os.path.dirname(__file__), "example2.jpg")], | |
fn=inference, | |
inputs=image_upload, | |
) | |
dropdown = Dropdown( | |
["shakespeare", "luxun", "xuzhimo", "moyan", "laoshe"], | |
value="luxun", | |
label="Essay Style", | |
info="选择你需要的文章的风格" | |
) | |
essay_btn = Button("Generate Essay", variant='primary') | |
with Column(scale=1): | |
Markdown("Scene 2:ReadImg(识图)") | |
prediction_output = Textbox(label="Prediction") | |
Markdown("Scene 3:GenEssay(风格小作文)") | |
essay_output = Textbox(label="Essay", info="大约50字") | |
Markdown("Scene 4:Txt2Aud(文字转语音)") | |
audio_out = Audio(label="Generated Audio", type="filepath").style(height=20) | |
audio_option = Dropdown(AVAILABLE_PROMPTS, value="Speaker 7 (zh)", label="Acoustic Prompt", | |
elem_id="speaker_option") | |
audio_btn = Button("Generate Audio", variant='primary') | |
with Column(scale=1): | |
Markdown("Scene 5: Img&Aud2Talker(图片&语音转talker)") | |
gen_video = gr.Video(label="Generated video", format="mp4") | |
talker_btn = Button('Generate Talker', elem_id="sadtalker_generate", variant='primary') | |
# Step 1 | |
image_upload.change(fn=inference, inputs=image_upload, outputs=comic_style_output) | |
# Step 2 | |
essay_btn.click(fn=predict_step, inputs=[cohere_key, image_upload, dropdown], outputs=[prediction_output, essay_output], | |
api_name="essay_generate") | |
# Step 3 | |
audio_btn.click(fn=gen_tts, inputs=[essay_output, audio_option], outputs=audio_out) | |
# Step 4 | |
talker_btn.click(fn=sad_talker.test, inputs=[comic_style_output, audio_out], outputs=[gen_video]) | |
demo.launch(debug=True) | |