import streamlit as st from PIL import Image from predictions import get_predictions def main(): st.title("Image Whisper App") uploaded_image = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"]) if uploaded_image is not None: st.subheader("Uploaded Image") st.image(uploaded_image, use_column_width=True) if st.button("Submit"): processed_image, text, audio = get_predictions(uploaded_image) st.subheader("Output image with predicted instances") st.image(processed_image, use_column_width=True) st.subheader("Textual Description") st.write(text, "Narration in Text") st.subheader("Audio Narration") if isinstance(audio, tuple): sample_rate, audio_data = audio st.audio(audio_data, format='audio/wav', sample_rate=sample_rate) else: st.audio(audio, format='audio/wav') if __name__ == '__main__': main()