SameerR007 commited on
Commit
7873a48
1 Parent(s): 5c1f193

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -4
app.py CHANGED
@@ -46,8 +46,9 @@ def predict_caption(model, image, tokenizer, max_length):
46
  return in_text
47
  from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
48
  from tensorflow.keras.models import Model
49
- import pyttsx3
50
- engine=pyttsx3.init()
 
51
  vgg_model = VGG16()
52
  vgg_model = Model(inputs=vgg_model.inputs, outputs=vgg_model.layers[-2].output)
53
  from tensorflow.keras.preprocessing.image import img_to_array
@@ -58,6 +59,7 @@ if(uploaded_image!=None):
58
  display_image=Image.open(uploaded_image)
59
  st.image(display_image)
60
  if st.button("Caption"):
 
61
  display_image=display_image.resize((224,224))
62
  image = img_to_array(display_image)
63
  image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
@@ -65,5 +67,8 @@ if(uploaded_image!=None):
65
  feature = vgg_model.predict(image, verbose=0)
66
  final=predict_caption(model, feature, tokenizer, max_length)
67
  final_output=((" ").join(final.split(" ")[1:len(final.split(" "))-1]))
68
- engine.say(final_output)
69
- engine.runAndWait()
 
 
 
 
46
  return in_text
47
  from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
48
  from tensorflow.keras.models import Model
49
+ from gtts import gTTS
50
+ from io import BytesIO
51
+ sound_file = BytesIO()
52
  vgg_model = VGG16()
53
  vgg_model = Model(inputs=vgg_model.inputs, outputs=vgg_model.layers[-2].output)
54
  from tensorflow.keras.preprocessing.image import img_to_array
 
59
  display_image=Image.open(uploaded_image)
60
  st.image(display_image)
61
  if st.button("Caption"):
62
+ st.text("Please be patient...")
63
  display_image=display_image.resize((224,224))
64
  image = img_to_array(display_image)
65
  image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
 
67
  feature = vgg_model.predict(image, verbose=0)
68
  final=predict_caption(model, feature, tokenizer, max_length)
69
  final_output=((" ").join(final.split(" ")[1:len(final.split(" "))-1]))
70
+ tts = gTTS(final_output, lang='en')
71
+ tts.write_to_fp(sound_file)
72
+ st.text("Output:")
73
+ st.text(final_output)
74
+ st.audio(sound_file)