|
import streamlit as st |
|
from gpt4v import GPT4Vision |
|
from gpttts import GPTTTS |
|
from openai import OpenAI |
|
import os |
|
|
|
|
|
image = GPT4Vision() |
|
client = OpenAI(api_key=os.environ['OPENAI_API_KEY']) |
|
talk = GPTTTS(client) |
|
|
|
st.set_page_config(page_title="Miracle", page_icon="π€", layout="wide") |
|
|
|
|
|
def main(): |
|
|
|
|
|
col1, col2, col3 = st.columns(spec=[1,1,1],gap="small") |
|
|
|
with col1: |
|
|
|
|
|
language = st.selectbox("Select language", options=["Afrikaans", "Arabic", "Armenian", "Azerbaijani", "Belarusian", "Bosnian", "Bulgarian", "Catalan", "Chinese", "Croatian", "Czech", "Danish", "Dutch", "English", "Estonian", "Finnish", "French", "Galician", "German", "Greek", "Hebrew", "Hindi", "Hungarian", "Icelandic", "Indonesian", "Italian", "Japanese", "Kannada", "Kazakh", "Korean", "Latvian", "Lithuanian", "Macedonian", "Malay", "Marathi", "Maori", "Nepali", "Norwegian", "Persian", "Polish", "Portuguese", "Romanian", "Russian", "Serbian", "Slovak", "Slovenian", "Spanish", "Swahili", "Swedish", "Tagalog", "Tamil", "Thai", "Turkish", "Ukrainian", "Urdu", "Vietnamese", "Welsh"],index=14) |
|
|
|
|
|
|
|
uploaded_file = st.file_uploader("Upload image or video", label_visibility="hidden") |
|
|
|
with col3: |
|
|
|
|
|
camera_image = st.camera_input("Or take a picture", label_visibility="hidden") |
|
|
|
|
|
file_to_process = uploaded_file if uploaded_file is not None else camera_image |
|
|
|
if file_to_process is not None: |
|
|
|
|
|
with col2: |
|
|
|
st.image(file_to_process, caption="Image") |
|
|
|
|
|
with st.spinner("Generating explanation..."): |
|
explanation = image.describe(image_file=file_to_process, user_message="Describe this image in detail, taken with blind persons mobile phone camera. Respondin language: "+language) |
|
st.success(explanation) |
|
|
|
|
|
|
|
|
|
with st.spinner("Generating audio..."): |
|
audio_file_path = talk.generate_speech(text=explanation, model="tts-1-hd", voice="alloy") |
|
|
|
with open(audio_file_path, "rb") as audio_file: |
|
audio_data = audio_file.read() |
|
st.audio(audio_data, format='audio/mpeg', start_time=0) |
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|