Spaces:
Sleeping
Sleeping
import streamlit as st | |
from transformers import pipeline | |
# 初始化视觉问题回答和文本到语音的管道 | |
# 这里需要替换为你自己的模型,如果使用默认模型则可以省略 model 参数 | |
vqa_pipeline = pipeline("visual-question-answering") | |
tts_pipeline = pipeline("text-to-speech") | |
def main(): | |
# 设置 Streamlit 应用的标题 | |
st.title("Visual Question Answering & Text-to-Audio App") | |
# 上传图片的 UI 组件 | |
image = st.file_uploader("Upload an image", type=["jpg", "png"]) | |
question = st.text_input("Enter your question") | |
# 当用户上传图片和输入问题后处理 | |
if image and question: | |
# 对用户上传的图片和问题进行视觉问题回答 | |
vqa_result = vqa_pipeline({"image": image, "question": question}) | |
answer = vqa_result['answer'] # 获取回答 | |
st.write(f"Answer: {answer}") # 显示回答 | |
# 将回答转换为语音 | |
if st.button("Convert Answer to Audio"): | |
tts_result = tts_pipeline(answer) | |
audio_data = tts_result['audio'] # 获取音频数据 | |
# 创建一个音频播放器,让用户可以听到回答 | |
st.audio(audio_data, format="audio/ogg") | |
if __name__ == "__main__": | |
main() | |