CR7CAD commited on
Commit
118cd25
·
verified ·
1 Parent(s): cd79461

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -60
app.py CHANGED
@@ -1,21 +1,48 @@
1
  import streamlit as st
2
- from transformers import pipeline
3
  from PIL import Image
4
  import os
5
- import torch
6
- from gtts import gTTS
7
  import tempfile
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  # function part
10
  # img2text
11
  def img2text(image_path):
12
  try:
13
- # Check if sentencepiece is installed
14
- try:
15
- import sentencepiece
16
- except ImportError:
17
- st.error("sentencepiece is not installed. Please install it with: pip install sentencepiece")
18
- return "Error: sentencepiece not installed"
19
 
20
  # Load the image-to-text model
21
  image_to_text_model = pipeline("image-to-text", model="naver-clova-ix/donut-base")
@@ -33,13 +60,14 @@ def img2text(image_path):
33
  # text2story
34
  def text2story(text):
35
  # For now, just return the extracted text as the story
36
- # This function can be expanded later with more sophisticated story generation
37
  story_text = f"Here's a story based on the text: {text}"
38
  return story_text
39
 
40
- # text2audio using Google Text-to-Speech instead of transformers
41
  def text2audio(story_text):
42
  try:
 
 
43
  # Create a temporary file
44
  temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
45
  temp_audio_path = temp_audio.name
@@ -62,56 +90,24 @@ st.set_page_config(page_title="Your Image to Audio Story",
62
  st.header("Turn Your Image to Audio Story")
63
  st.subheader("Using Donut model for text extraction")
64
 
65
- uploaded_file = st.file_uploader("Select an Image...", type=['png', 'jpg', 'jpeg', 'gif', 'bmp', 'webp'])
66
-
67
- if uploaded_file is not None:
68
- # Save the uploaded file temporarily
69
- bytes_data = uploaded_file.getvalue()
70
- image_temp_path = os.path.join(tempfile.gettempdir(), uploaded_file.name)
71
- with open(image_temp_path, "wb") as file:
72
- file.write(bytes_data)
73
-
74
- # Display the uploaded image
75
- st.image(uploaded_file, caption="Uploaded Image",
76
- use_column_width=True)
77
-
78
- # Stage 1: Image to Text
79
- with st.spinner('Processing img2text...'):
80
- extracted_text = img2text(image_temp_path)
81
- st.subheader("Extracted Text:")
82
- st.write(extracted_text)
83
 
84
- # Stage 2: Text to Story
85
- with st.spinner('Generating a story...'):
86
- story = text2story(extracted_text)
87
- st.subheader("Generated Story:")
88
- st.write(story)
89
 
90
- # Stage 3: Story to Audio data
91
- audio_file_path = None
92
- with st.spinner('Generating audio data...'):
93
- audio_file_path = text2audio(story)
 
 
94
 
95
- # Remove the temporary image file
96
- if os.path.exists(image_temp_path):
97
- os.remove(image_temp_path)
98
 
99
- # Play button
100
- if st.button("Play Audio"):
101
- if audio_file_path and os.path.exists(audio_file_path):
102
- # Play the generated audio
103
- with open(audio_file_path, "rb") as audio_file:
104
- audio_bytes = audio_file.read()
105
- st.audio(audio_bytes, format="audio/wav")
106
-
107
- # Clean up the audio file after playing
108
- try:
109
- os.remove(audio_file_path)
110
- except:
111
- pass
112
- else:
113
- st.warning("Audio generation failed. Playing a placeholder audio.")
114
- try:
115
- st.audio("kids_playing_audio.wav")
116
- except FileNotFoundError:
117
- st.error("Placeholder audio file not found. Audio playback is unavailable.")
 
1
  import streamlit as st
 
2
  from PIL import Image
3
  import os
 
 
4
  import tempfile
5
+ import subprocess
6
+ import sys
7
+
8
+ # Check for required dependencies and install if missing
9
+ def check_and_install_dependencies():
10
+ required_packages = {
11
+ "transformers": "transformers",
12
+ "sentencepiece": "sentencepiece",
13
+ "gtts": "gTTS"
14
+ }
15
+
16
+ missing_packages = []
17
+ for package, pip_name in required_packages.items():
18
+ try:
19
+ __import__(package)
20
+ except ImportError:
21
+ missing_packages.append((package, pip_name))
22
+
23
+ if missing_packages:
24
+ st.warning("Missing required dependencies. Please install them before continuing.")
25
+ for package, pip_name in missing_packages:
26
+ st.code(f"pip install {pip_name}", language="bash")
27
+
28
+ if st.button("Install Dependencies Automatically"):
29
+ with st.spinner("Installing dependencies..."):
30
+ for package, pip_name in missing_packages:
31
+ try:
32
+ subprocess.check_call([sys.executable, "-m", "pip", "install", pip_name])
33
+ st.success(f"Successfully installed {pip_name}")
34
+ except Exception as e:
35
+ st.error(f"Failed to install {pip_name}: {str(e)}")
36
+ st.info("Please restart the application after installing dependencies.")
37
+ return False
38
+ return True
39
 
40
  # function part
41
  # img2text
42
  def img2text(image_path):
43
  try:
44
+ # Import here to ensure dependencies are checked first
45
+ from transformers import pipeline
 
 
 
 
46
 
47
  # Load the image-to-text model
48
  image_to_text_model = pipeline("image-to-text", model="naver-clova-ix/donut-base")
 
60
  # text2story
61
  def text2story(text):
62
  # For now, just return the extracted text as the story
 
63
  story_text = f"Here's a story based on the text: {text}"
64
  return story_text
65
 
66
+ # text2audio using Google Text-to-Speech
67
  def text2audio(story_text):
68
  try:
69
+ from gtts import gTTS
70
+
71
  # Create a temporary file
72
  temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
73
  temp_audio_path = temp_audio.name
 
90
  st.header("Turn Your Image to Audio Story")
91
  st.subheader("Using Donut model for text extraction")
92
 
93
+ # Check dependencies before proceeding
94
+ dependencies_ok = check_and_install_dependencies()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
+ if dependencies_ok:
97
+ uploaded_file = st.file_uploader("Select an Image...", type=['png', 'jpg', 'jpeg', 'gif', 'bmp', 'webp'])
 
 
 
98
 
99
+ if uploaded_file is not None:
100
+ # Save the uploaded file temporarily
101
+ bytes_data = uploaded_file.getvalue()
102
+ image_temp_path = os.path.join(tempfile.gettempdir(), uploaded_file.name)
103
+ with open(image_temp_path, "wb") as file:
104
+ file.write(bytes_data)
105
 
106
+ # Display the uploaded image
107
+ st.image(uploaded_file, caption="Uploaded Image",
108
+ use_column_width=True)
109
 
110
+ # Stage 1: Image to Text
111
+ with st.spinner('Processing img2text...'):
112
+ extracted_text = img2text(image_temp_path)
113
+ st.subheader("Extracted Text:")