Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,33 @@
|
|
1 |
import streamlit as st
|
|
|
2 |
|
3 |
-
|
4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
+
from transformers import pipeline
|
3 |
|
4 |
+
# 初始化视觉问题回答和文本到语音的管道
|
5 |
+
# 这里需要替换为你自己的模型,如果使用默认模型则可以省略 model 参数
|
6 |
+
vqa_pipeline = pipeline("visual-question-answering")
|
7 |
+
tts_pipeline = pipeline("text-to-speech")
|
8 |
+
|
9 |
+
def main():
|
10 |
+
# 设置 Streamlit 应用的标题
|
11 |
+
st.title("Visual Question Answering & Text-to-Audio App")
|
12 |
+
|
13 |
+
# 上传图片的 UI 组件
|
14 |
+
image = st.file_uploader("Upload an image", type=["jpg", "png"])
|
15 |
+
question = st.text_input("Enter your question")
|
16 |
+
|
17 |
+
# 当用户上传图片和输入问题后处理
|
18 |
+
if image and question:
|
19 |
+
# 对用户上传的图片和问题进行视觉问题回答
|
20 |
+
vqa_result = vqa_pipeline({"image": image, "question": question})
|
21 |
+
answer = vqa_result['answer'] # 获取回答
|
22 |
+
st.write(f"Answer: {answer}") # 显示回答
|
23 |
+
|
24 |
+
# 将回答转换为语音
|
25 |
+
if st.button("Convert Answer to Audio"):
|
26 |
+
tts_result = tts_pipeline(answer)
|
27 |
+
audio_data = tts_result['audio'] # 获取音频数据
|
28 |
+
|
29 |
+
# 创建一个音频播放器,让用户可以听到回答
|
30 |
+
st.audio(audio_data, format="audio/ogg")
|
31 |
+
|
32 |
+
if __name__ == "__main__":
|
33 |
+
main()
|