MdRiad's picture
Update app.py
45b68a6
raw
history blame contribute delete
No virus
56.4 kB
# import streamlit as st
# from PIL import Image
# import streamlit as st
# import numpy as np
# from PIL import Image
# import tensorflow as tf
# from tensorflow import keras
# from keras.models import Model
#
# from keras.preprocessing import image
# from keras.preprocessing.sequence import pad_sequences
# from keras.utils import load_img
#
# from keras.models import load_model
# from keras.applications.vgg16 import preprocess_input
# from keras.models import Model
# from keras.utils import to_categorical
# from keras.preprocessing.text import Tokenizer
#
# model = load_model('best_model_50.h5') # Load your trained model
# tokenizer = Tokenizer() # Load your tokenizer
# tokenizer.fit_on_texts([]) # Update tokenizer with your specific tokens and word index
# max_length = 35 # Specify the maximum length of captions
# def idx_to_word(index, tokenizer):
# for word, idx in tokenizer.word_index.items():
# if idx == index:
# return word
# return None
# def predict_caption(model, image, tokenizer, max_length):
# in_text = 'startseq'
# for _ in range(max_length):
# sequence = tokenizer.texts_to_sequences([in_text])[0]
# sequence = pad_sequences([sequence], maxlen=max_length)
# yhat = model.predict([image, sequence], verbose=0)
# yhat = np.argmax(yhat)
# word = idx_to_word(yhat, tokenizer)
# if word is None:
# break
# in_text += ' ' + word
# if word == 'endseq':
# break
# return in_text
#
#
# def main():
# st.title("Image Captioning App")
#
# # Load tokenizer
# tokenizer = load_tokenizer()
#
# # Load pre-trained model
# model = load_model('image_caption_model.h5')
#
# # File uploader
# uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
#
# if uploaded_file is not None:
# # Display the uploaded image
# image = load_image(uploaded_file)
# st.image(image, caption='Uploaded Image', use_column_width=True)
#
# # Preprocess the image
# img = preprocess_image(image)
#
# # Generate caption
# caption = predict_caption(model, img, tokenizer, max_length=20)
# st.header("Generated Caption:")
# st.write(caption)
#
# if __name__ == '__main__':
# main()
# import streamlit as st
# import preprocessing
# import matplotlib.pyplot as plt
# import helper
# st.sidebar.title("Whatsapp chat analyzer")
# import streamlit as st
# import numpy as np
# from PIL import Image
# from keras.applications.vgg16 import VGG16, preprocess_input
# from keras.models import Model
# from keras.preprocessing.sequence import pad_sequences
# from keras.models import load_model
# from keras.preprocessing.text import Tokenizer
# import pickle
#
# def train_tokenizer(texts):
# tokenizer = Tokenizer()
# tokenizer.fit_on_texts(texts)
# return tokenizer
#
# def load_tokenizer():
# with open('tokenizer50.pickle', 'rb') as handle:
# tokenizer = pickle.load(handle)
# return tokenizer
#
# def load_image(image_file):
# img = Image.open(image_file)
# return img
#
# def preprocess_image(image):
# img = image.resize((224, 224)) # Resize the image to match the input size of VGG16
# img = np.array(img)
# img = np.expand_dims(img, axis=0)
# img = preprocess_input(img)
#
# # Load VGG16 model to extract image features
# base_model = VGG16(weights='imagenet')
# model = Model(inputs=base_model.input, outputs=base_model.layers[-2].output)
#
# # Extract image features
# features = model.predict(img)
# features = np.reshape(features, (1, 4096)) # Reshape features to match the expected input shape
#
# return features
#
# def predict_caption(model, image, tokenizer, max_length):
# in_text = 'startseq'
# for _ in range(max_length):
# sequence = tokenizer.texts_to_sequences([in_text])[0]
# sequence = pad_sequences([sequence], maxlen=max_length)
# yhat = model.predict([image, sequence], verbose=0)
# yhat = np.argmax(yhat)
# word = idx_to_word(yhat, tokenizer)
# if word is None:
# break
# in_text += ' ' + word
# if word == 'endseq':
# break
# return in_text
#
# def idx_to_word(index, tokenizer):
# for word, idx in tokenizer.word_index.items():
# if idx == index:
# return word
# return None
#
# def main():
# st.title("Image Caption Generator App")
#
# # Check if tokenizer exists
# try:
# tokenizer = load_tokenizer()
# except FileNotFoundError:
# st.write("Tokenizer not found. Please provide a list of texts to train the tokenizer.")
# return
#
# # Load pre-trained model
# model = load_model('best_model_50.h5')
#
# # File uploader
# uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
#
# if uploaded_file is not None:
# # Display the uploaded image
# image = load_image(uploaded_file)
# st.image(image, caption='Uploaded Image', use_column_width=True)
#
# # Preprocess the image
# img = preprocess_image(image)
#
# # Generate caption
# caption = predict_caption(model, img, tokenizer, max_length=20)
# st.header("Generated Caption:")
# st.write(caption)
#
# if __name__ == '__main__':
# main()
# import streamlit as st
# import numpy as np
# from PIL import Image
# from keras.applications.vgg16 import VGG16, preprocess_input
# from keras.models import Model
# from keras.preprocessing.sequence import pad_sequences
# from keras.models import load_model
# from keras.preprocessing.text import Tokenizer
# import pickle
#
# def load_tokenizer():
# with open('tokenizer50.pickle', 'rb') as handle:
# tokenizer = pickle.load(handle)
# return tokenizer
#
# def load_image(image_file):
# img = Image.open(image_file)
# return img
#
# def preprocess_image(image):
# img = image.resize((224, 224)) # Resize the image to match the input size of VGG16
# img = np.array(img)
# img = np.expand_dims(img, axis=0)
# img = preprocess_input(img)
#
# # Load VGG16 model to extract image features
# base_model = VGG16(weights='imagenet')
# model = Model(inputs=base_model.input, outputs=base_model.layers[-2].output)
#
# # Extract image features
# features = model.predict(img)
# features = np.reshape(features, (1, 4096)) # Reshape features to match the expected input shape
#
# return features
#
# def predict_caption(model, image, tokenizer, max_length):
# in_text = 'startseq'
# for _ in range(max_length):
# sequence = tokenizer.texts_to_sequences([in_text])[0]
# sequence = pad_sequences([sequence], maxlen=max_length)
# yhat = model.predict([image, sequence], verbose=0)
# yhat = np.argmax(yhat)
# word = idx_to_word(yhat, tokenizer)
# if word is None:
# break
# in_text += ' ' + word
# if word == 'endseq':
# break
# return in_text
#
# def idx_to_word(index, tokenizer):
# for word, idx in tokenizer.word_index.items():
# if idx == index:
# return word
# return None
#
# def main():
# st.title("Image Caption Generator App")
#
# # Check if tokenizer exists
# try:
# tokenizer = load_tokenizer()
# except FileNotFoundError:
# st.write("Tokenizer not found. Please provide a list of texts to train the tokenizer.")
# return
#
# # Load pre-trained model
# model = load_model('best_model_50.h5')
#
# # File uploader
# uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
#
# if uploaded_file is not None:
# # Display the uploaded image
# image = load_image(uploaded_file)
# st.image(image, caption='Uploaded Image', use_column_width=True)
#
# # Preprocess the image
# img = preprocess_image(image)
#
# # Generate caption
# caption = predict_caption(model, img, tokenizer, max_length=35)
# st.header("Generated Caption:")
# st.write(caption)
#
# if __name__ == '__main__':
# main()
# import streamlit as st
# import numpy as np
# from PIL import Image
# from keras.applications.vgg16 import VGG16, preprocess_input
# from keras.models import Model
# from keras.preprocessing.sequence import pad_sequences
# from keras.models import load_model
# from keras.preprocessing.text import Tokenizer
# import pickle
#
# # Load tokenizer
# def load_tokenizer():
# with open('tokenizer50.pickle', 'rb') as handle:
# tokenizer = pickle.load(handle)
# return tokenizer
#
# # Preprocess the image
# def preprocess_image(image):
# img = image.resize((224, 224)) # Resize the image to match the input size of VGG16
# img = np.array(img)
# img = preprocess_input(img)
# return img
#
# # Generate caption
# def generate_caption(model, image, tokenizer, max_length):
# in_text = 'startseq'
# for _ in range(max_length):
# sequence = tokenizer.texts_to_sequences([in_text])[0]
# sequence = pad_sequences([sequence], maxlen=max_length)
# yhat = model.predict([image, sequence], verbose=0)
# yhat = np.argmax(yhat)
# word = idx_to_word(yhat, tokenizer)
# if word is None:
# break
# in_text += ' ' + word
# if word == 'endseq':
# break
# return in_text
#
# # Convert index to word
# def idx_to_word(index, tokenizer):
# for word, idx in tokenizer.word_index.items():
# if idx == index:
# return word
# return None
#
# def main():
# st.title("Image Caption Generator App")
#
# # Check if tokenizer exists
# try:
# tokenizer = load_tokenizer()
# except FileNotFoundError:
# st.write("Tokenizer not found. Please provide a list of texts to train the tokenizer.")
# return
#
# # Load pre-trained model
# model = load_model('best_model_50.h5')
#
# # Load VGG16 model for feature extraction
# vgg_model = VGG16(weights='imagenet', include_top=False)
# feature_extractor = Model(inputs=vgg_model.input, outputs=vgg_model.output)
#
# # File uploader
# uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
#
# if uploaded_file is not None:
# # Display the uploaded image
# image = Image.open(uploaded_file)
# st.image(image, caption='Uploaded Image', use_column_width=True)
#
# # Preprocess the image
# processed_image = preprocess_image(image)
#
# # Add batch dimension to the image
# image_batch = np.expand_dims(processed_image, axis=0)
#
# # Extract image features
# image_features = feature_extractor.predict(image_batch)
#
# # Reshape the image features to match the expected shape
# reshaped_image_features = np.reshape(image_features, (1, 7*7, 512))
#
# # Generate caption
# caption = generate_caption(model, reshaped_image_features, tokenizer, max_length=35)
# st.header("Generated Caption:")
# st.write(caption)
#
# if __name__ == '__main__':
# main()
# import streamlit as st
# import numpy as np
# import pickle
# import tensorflow as tf
# from keras.models import Model
# from keras.applications.mobilenet_v2 import MobileNetV2, preprocess_input
# from keras.preprocessing.image import load_img, img_to_array
# from keras.preprocessing.sequence import pad_sequences
#
# # Load MobileNetV2 model
# mobilenet_model = MobileNetV2(weights="imagenet")
# mobilenet_model = Model(inputs=mobilenet_model.inputs, outputs=mobilenet_model.layers[-2].output)
#
# # Load your trained model
# model = tf.keras.models.load_model('best_mode_vgg_40.h5')
#
# # Load the tokenizer
# with open('tokenizer40.pickle', 'rb') as tokenizer_file:
# tokenizer = pickle.load(tokenizer_file)
#
# # Set custom web page title
# st.set_page_config(page_title="Caption Generator App", page_icon="📷")
#
# # Streamlit app
# st.title("Image Caption Generator")
# st.markdown(
# "Upload an image, and this app will generate a caption for it using a trained LSTM model."
# )
#
# # Upload image
# uploaded_image = st.file_uploader("Choose an image", type=["jpg", "jpeg", "png"])
#
# # Process uploaded image
# if uploaded_image is not None:
# st.subheader("Uploaded Image")
# st.image(uploaded_image, caption="Uploaded Image", use_column_width=True)
#
# st.subheader("Generated Caption")
# # Display loading spinner while processing
# with st.spinner("Generating caption..."):
# # Load image
# image = load_img(uploaded_image, target_size=(224, 224))
# image = img_to_array(image)
# image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
# image = preprocess_input(image)
#
# # Extract features using VGG16
# image_features = mobilenet_model.predict(image, verbose=0)
#
# # Max caption length
# max_caption_length = 35
#
#
# # Define function to get word from index
# def get_word_from_index(index, tokenizer):
# return next(
# (word for word, idx in tokenizer.word_index.items() if idx == index), None
# )
#
#
# # Generate caption using the model
# def predict_caption(model, image_features, tokenizer, max_caption_length):
# caption = "startseq"
# for _ in range(max_caption_length):
# sequence = tokenizer.texts_to_sequences([caption])[0]
# sequence = pad_sequences([sequence], maxlen=max_caption_length)
# yhat = model.predict([image_features, sequence], verbose=0)
# predicted_index = np.argmax(yhat)
# predicted_word = get_word_from_index(predicted_index, tokenizer)
# caption += " " + predicted_word
# if predicted_word is None or predicted_word == "endseq":
# break
# return caption
#
#
# # Generate caption
# generated_caption = predict_caption(model, image_features, tokenizer, max_caption_length)
#
# # Remove startseq and endseq
# generated_caption = generated_caption.replace("startseq", "").replace("endseq", "")
#
# # Display the generated caption with custom styling
# st.markdown(
# f'<div style="border-left: 6px solid #ccc; padding: 5px 20px; margin-top: 20px;">'
# f'<p style="font-style: italic;">“{generated_caption}”</p>'
# f'</div>',
# unsafe_allow_html=True
# )
#
# from PIL import Image
# import numpy as np
# import matplotlib.pyplot as plt
#
#
# def preprocess_image(image_path):
# image = Image.open(image_path)
# image = image.resize((224, 224)) # Resize the image to match VGG16 input size
# image = np.array(image)
# image = preprocess_input(image)
# return image
#
# import numpy as np
# import pandas as pd
# import cv2
# import os
# from glob import glob
# import pickle
#
# import tensorflow as tf
# from tensorflow import keras
# from keras.applications.vgg16 import VGG16,preprocess_input
# #from keras.preprocessing.image import load_img,img_to_array
# from keras.preprocessing import image
#
# # from keras.utils import load_img
# from keras.utils import img_to_array
#
# #from keras.preprocessing.sequence import pad_sequences
# from keras.utils import pad_sequences
#
# from keras.models import Model
# from keras.utils import to_categorical,plot_model
# from keras.layers import Input,Dense,LSTM,Embedding,Dropout,add
# # Load MobileNetV2 model
#
#
# # Load your trained model
# model = tf.keras.models.load_model('best_mode_vgg_40.h5')
#
# # Load the tokenizer
# with open('tokenizer40.pickle', 'rb') as tokenizer_file:
# tokenizer = pickle.load(tokenizer_file)
#
# # Set custom web page title
# st.set_page_config(page_title="Caption Generator App", page_icon="📷")
#
# # Streamlit app
# st.title("Image Caption Generator")
# st.markdown(
# "Upload an image, and this app will generate a caption for it using a trained LSTM model."
# )
#
# # Upload image
# uploaded_image = st.file_uploader("Choose an image", type=["jpg", "jpeg", "png"])
#
# # Process uploaded image
# if uploaded_image is not None:
# st.subheader("Uploaded Image")
# st.image(uploaded_image, caption="Uploaded Image", use_column_width=True)
#
# st.subheader("Generated Caption")
# # Display loading spinner while processing
# with st.spinner("Generating caption..."):
# # Load image
# image = load_img(uploaded_image, target_size=(224, 224))
# image = img_to_array(image)
# image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
# image = preprocess_input(image)
#
# # Extract features using VGG16
# image_features = model.predict(image, verbose=0)
#
# # Max caption length
# max_caption_length = 35
#
#
# # Define function to get word from index
# def get_word_from_index(index, tokenizer):
# return next(
# (word for word, idx in tokenizer.word_index.items() if idx == index), None
# )
#
#
# # generate caption for an image
# def predict_caption(model, image, tokenizer, max_length):
# # add start tag for generation process
# in_text = 'startseq'
# # iterate over the max length of sequence
# for i in range(max_length):
# # encode input sequence
# sequence = tokenizer.texts_to_sequences([in_text])[0]
# # pad the sequence
# sequence = pad_sequences([sequence], max_length)
# # predict next word
# yhat = model.predict([image, sequence], verbose=0)
# # get index with high probability
# yhat = np.argmax(yhat)
# # convert index to word
# word = get_word_from_index(yhat, tokenizer)
# # stop if word not found
# if word is None:
# break
# # append word as input for generating next word
# in_text += " " + word
# # stop if we reach end tag
# if word == 'endseq':
# break
#
# return in_text
#
#
# def preprocess_image(image_path):
# image = Image.open(image_path)
# image = image.resize((224, 224)) # Resize the image to match VGG16 input size
# image = np.array(image)
# image = preprocess_input(image)
# return image
#
#
# def generate_caption_for_new_image(image_path):
# # Preprocess the image
# new_image = preprocess_image(image_path)
#
# # Generate features for the new image using the pre-trained VGG16 model
# vgg_model = VGG16()
# vgg_model = Model(inputs=vgg_model.inputs, outputs=vgg_model.layers[-2].output)
# new_image_features = vgg_model.predict(np.array([new_image]), verbose=0)
#
# predicted_caption = predict_caption(model, new_image_features, tokenizer,
# max_caption_length)
# predicted_caption = predicted_caption.replace('startseq', '').replace('endseq', '').strip()
# predicted_caption = predicted_caption.capitalize()
# if not predicted_caption.endswith('.'):
# predicted_caption += '.'
#
# return generate_caption_for_new_image
#
#
# # Generate caption using the model
# def predict_caption(model, image_features, tokenizer, max_caption_length):
# caption = "startseq"
# for _ in range(max_caption_length):
# sequence = tokenizer.texts_to_sequences([caption])[0]
# sequence = pad_sequences([sequence], maxlen=max_caption_length)
# yhat = model.predict([image_features, sequence], verbose=0)
# predicted_index = np.argmax(yhat)
# predicted_word = get_word_from_index(predicted_index, tokenizer)
# caption += " " + predicted_word
# if predicted_word is None or predicted_word == "endseq":
# break
# return caption
#
#
# # Generate caption
# generated_caption = predict_caption(model, image_features, tokenizer, max_caption_length)
#
# # Remove startseq and endseq
# generated_caption = generated_caption.replace("startseq", "").replace("endseq", "")
#
# # Display the generated caption with custom styling
# st.markdown(
# f'<div style="border-left: 6px solid #ccc; padding: 5px 20px; margin-top: 20px;">'
# f'<p style="font-style: italic;">“{generated_caption}”</p>'
# f'</div>',
# unsafe_allow_html=True
# )
#
#
#
#
# from PIL import Image
# import numpy as np
# import matplotlib.pyplot as plt
#
#
#
#
#
#
#
#
#
# # Predict caption for the new image
# # loaded_model, image, loaded_tokenizer
#
# # Remove startseq and endseq tokens
#
#
# # Capitalize the first letter
# predicted_caption = predicted_caption.capitalize()
#
# # Add a full stop at the end if not present
# if not predicted_caption.endswith('.'):
# predicted_caption += '.'
#
# # Display the image and the predicted caption
# plt.imshow(Image.open(image_path))
# plt.title(predicted_caption)
# plt.axis('off')
# plt.show()
#
#
# # Specify the path to the new image
# new_image_path = "/content/drive/MyDrive/download.jpg"
# generate_caption_for_new_image(new_image_path)
#
# import numpy as np
# import pandas as pd
# import cv2
# import os
# from glob import glob
# import pickle
#
# import tensorflow as tf
# from tensorflow import keras
# from keras.applications.vgg16 import VGG16,preprocess_input
# #from keras.preprocessing.image import load_img,img_to_array
# from keras.preprocessing import image
#
# from keras.utils import load_img
# from keras.utils import img_to_array
#
# #from keras.preprocessing.sequence import pad_sequences
# from keras.utils import pad_sequences
#
# from keras.models import Model
#
# from keras.models import load_model
# import pickle
#
#
# # Load the model
# model = load_model("best_mode_vgg_40.h5", compile=False)
#
# # Load the tokenizer
# with open('tokenizer40.pickle', 'rb') as handle:
# tokenizer = pickle.load(handle)
#
#
# def idx_to_word(integer, tokenizer):
# for word, index in tokenizer.word_index.items():
# if index == integer:
# return word
# return None
#
# # Function to preprocess an image for VGG16
# # generate caption for an image
# def predict_caption(model, image, tokenizer, max_length=35):
# # add start tag for generation process
# in_text = 'startseq'
# # iterate over the max length of sequence
# for i in range(max_length):
# # encode input sequence
# sequence = tokenizer.texts_to_sequences([in_text])[0]
# # pad the sequence
# sequence = pad_sequences([sequence], max_length)
# # predict next word
# yhat = model.predict([image, sequence], verbose=0)
# # get index with high probability
# yhat = np.argmax(yhat)
# # convert index to word
# word = idx_to_word(yhat,tokenizer)
# # stop if word not found
# if word is None:
# break
# # append word as input for generating next word
# in_text += " " + word
# # stop if we reach end tag
# if word == 'endseq':
# break
#
# return in_text
#
#
#
# import urllib.request
# import numpy as np
# from PIL import Image
# from keras.applications.inception_v3 import preprocess_input
#
# def load_and_preprocess_image(image_url, target_size=(299, 299)):
# # Load image from URL
# urllib.request.urlretrieve(image_url, 'temp_image.jpg')
# image = Image.open('temp_image.jpg')
#
# # Preprocess image
# image = image.resize(target_size)
# image = np.array(image)
# image = preprocess_input(image)
# image = np.expand_dims(image, axis=0)
#
# return image
# import streamlit as st
# import numpy as np
# import pickle
# import tensorflow as tf
# from keras.models import Model
# from keras.applications.vgg16 import VGG16, preprocess_input
# from keras.preprocessing.image import load_img, img_to_array
# from keras.models import load_model
# from keras.preprocessing.sequence import pad_sequences
# from PIL import Image
#
# # Preprocess the uploaded image
# def preprocess_image(uploaded_image):
# image = Image.open(uploaded_image)
# image = image.resize((224, 224)) # Resize the image to match VGG16 input size
# image = np.array(image)
# image = preprocess_input(image)
# return image
#
# # Load MobileNetV2 model
# model_1 = load_model("best_mode_vgg_40.h5", compile=False)
#
# # Load the tokenizer
# with open("tokenizer40.pickle", 'rb') as handle:
# tokenizer = pickle.load(handle)
#
# def idx_to_word(integer, tokenizer):
# for word, index in tokenizer.word_index.items():
# if index == integer:
# return word
# return None
#
# # Function to generate captions
# def predict_caption(model, image, tokenizer, max_length):
# # add start tag for generation process
# in_text = 'startseq'
# # iterate over the max length of sequence
# for i in range(max_length):
# # encode input sequence
# sequence = tokenizer.texts_to_sequences([in_text])[0]
# # pad the sequence
# sequence = pad_sequences([sequence], max_length)
# # predict next word
# yhat = model.predict([image, sequence], verbose=0)
# # get index with high probability
# yhat = np.argmax(yhat)
# # convert index to word
# word = idx_to_word(yhat, tokenizer)
# # stop if word not found
# if word is None:
# break
# # append word as input for generating the next word
# in_text += " " + word
# # stop if we reach end tag
# if word == 'endseq':
# break
# return in_text
#
# # Streamlit app
# def main():
# st.title("Image Caption Generator 📷 ➡️ 📝")
#
# # Upload an image
# uploaded_image = st.file_uploader("Choose an image...", type=["jpg", "png", "jpeg"])
#
# if uploaded_image is not None:
# image = Image.open(uploaded_image)
# st.image(image, caption="Uploaded Image", use_column_width=True)
#
# # Generate caption button
# if st.button("Generate Caption"):
# # Preprocess the uploaded image
# new_image = preprocess_image(uploaded_image)
#
# # Generate features for the new image using the pre-trained VGG16 model
# vgg_model = VGG16()
# vgg_model = Model(inputs=vgg_model.inputs, outputs=vgg_model.layers[-2].output)
# new_image_features = vgg_model.predict(np.array([new_image]), verbose=0)
#
# # Predict caption for the new image
# generated_caption = predict_caption(model_1, new_image_features, tokenizer, max_length=35)
# generated_caption = generated_caption.replace('startseq', '').replace('endseq', '').strip()
# generated_caption = generated_caption.capitalize()
#
# # Display the generated caption
# st.write("Generated Caption:", generated_caption+".")
#
# if __name__ == "__main__":
# main()
#
#
#
# import streamlit as st
# import numpy as np
# import pickle
# import tensorflow as tf
# from keras.models import Model
# from keras.applications.vgg16 import VGG16, preprocess_input
# from keras.preprocessing.image import load_img, img_to_array
# from keras.models import load_model
# from keras.preprocessing.sequence import pad_sequences
# from PIL import Image
# import requests
# from io import BytesIO
#
# # Preprocess the uploaded image
# def preprocess_image(uploaded_image):
# image = Image.open(uploaded_image)
# image = image.resize((224, 224)) # Resize the image to match VGG16 input size
# image = np.array(image)
# image = preprocess_input(image)
# return image
#
# # Preprocess the image from URL
# def preprocess_image_url(image_url):
# response = requests.get(image_url)
# image = Image.open(BytesIO(response.content))
# image = image.resize((224, 224)) # Resize the image to match VGG16 input size
# image = np.array(image)
# image = preprocess_input(image)
# return image
#
# # Load MobileNetV2 model
# model_1 = load_model("best_mode_vgg_40.h5", compile=False)
#
# # Load the tokenizer
# with open("tokenizer40.pickle", 'rb') as handle:
# tokenizer = pickle.load(handle)
#
# def idx_to_word(integer, tokenizer):
# for word, index in tokenizer.word_index.items():
# if index == integer:
# return word
# return None
#
# # Function to generate captions
# def predict_caption(model, image, tokenizer, max_length):
# # add start tag for generation process
# in_text = 'startseq'
# # iterate over the max length of sequence
# for i in range(max_length):
# # encode input sequence
# sequence = tokenizer.texts_to_sequences([in_text])[0]
# # pad the sequence
# sequence = pad_sequences([sequence], max_length)
# # predict next word
# yhat = model.predict([image, sequence], verbose=0)
# # get index with high probability
# yhat = np.argmax(yhat)
# # convert index to word
# word = idx_to_word(yhat, tokenizer)
# # stop if word not found
# if word is None:
# break
# # append word as input for generating the next word
# in_text += " " + word
# # stop if we reach end tag
# if word == 'endseq':
# break
# return in_text
#
# # Streamlit app
# def main():
# st.title("Image Caption Generator 📷 ➡️ 📝")
#
# # Choose an input option: Upload or URL
# input_option = st.radio("Select an input option:", ("Upload Image", "Image URL"))
#
# if input_option == "Upload Image":
# # Upload an image
# uploaded_image = st.file_uploader("Choose an image...", type=["jpg", "png", "jpeg"])
#
# if uploaded_image is not None:
# image = Image.open(uploaded_image)
# st.image(image, caption="Uploaded Image", use_column_width=True)
#
# # Generate caption button
# if st.button("Generate Caption"):
# # Preprocess the uploaded image
# new_image = preprocess_image(uploaded_image)
#
# # Generate features for the new image using the pre-trained VGG16 model
# vgg_model = VGG16()
# vgg_model = Model(inputs=vgg_model.inputs, outputs=vgg_model.layers[-2].output)
# new_image_features = vgg_model.predict(np.array([new_image]), verbose=0)
#
# # Predict caption for the new image
# generated_caption = predict_caption(model_1, new_image_features, tokenizer, max_length=35)
# generated_caption = generated_caption.replace('startseq', '').replace('endseq', '').strip()
# generated_caption = generated_caption.capitalize()
#
# # Display the generated caption
# st.write("Generated Caption:", generated_caption + ".")
#
# elif input_option == "Image URL":
# # Input image URL
# image_url = st.text_input("Enter the image URL:")
#
# if st.button("Generate Caption") and image_url:
# # Preprocess the image from URL
# new_image = preprocess_image_url(image_url)
#
# # Generate features for the new image using the pre-trained VGG16 model
# vgg_model = VGG16()
# vgg_model = Model(inputs=vgg_model.inputs, outputs=vgg_model.layers[-2].output)
# new_image_features = vgg_model.predict(np.array([new_image]), verbose=0)
#
# # Predict caption for the new image
# generated_caption = predict_caption(model_1, new_image_features, tokenizer, max_length=35)
# generated_caption = generated_caption.replace('startseq', '').replace('endseq', '').strip()
# generated_caption = generated_caption.capitalize()
#
# # Display the generated caption
# st.write("Generated Caption:", generated_caption+".")
#
# if __name__ == "__main__":
# main()
#
# import streamlit as st
# import numpy as np
# import pickle
# import tensorflow as tf
# from keras.models import Model
# from keras.applications.vgg16 import VGG16, preprocess_input
# from keras.preprocessing.image import load_img, img_to_array
# from keras.models import load_model
# from keras.preprocessing.sequence import pad_sequences
# from PIL import Image
# import requests
# from io import BytesIO
#
# # Preprocess the uploaded image
# def preprocess_image(uploaded_image):
# image = Image.open(uploaded_image)
# image = image.resize((224, 224)) # Resize the image to match VGG16 input size
# image = np.array(image)
# image = preprocess_input(image)
# return image
#
# # Preprocess the image from URL
# def preprocess_image_url(image_url):
# response = requests.get(image_url)
# image = Image.open(BytesIO(response.content))
# image = image.resize((224, 224)) # Resize the image to match VGG16 input size
# image = np.array(image)
# image = preprocess_input(image)
# return image
#
# # Load MobileNetV2 model
# model_1 = load_model("best_mode_vgg_40.h5", compile=False)
#
# # Load the tokenizer
# with open("tokenizer40.pickle", 'rb') as handle:
# tokenizer = pickle.load(handle)
#
# def idx_to_word(integer, tokenizer):
# for word, index in tokenizer.word_index.items():
# if index == integer:
# return word
# return None
#
# # Function to generate captions
# def predict_caption(model, image, tokenizer, max_length):
# # add start tag for generation process
# in_text = 'startseq'
# # iterate over the max length of sequence
# for i in range(max_length):
# # encode input sequence
# sequence = tokenizer.texts_to_sequences([in_text])[0]
# # pad the sequence
# sequence = pad_sequences([sequence], max_length)
# # predict next word
# yhat = model.predict([image, sequence], verbose=0)
# # get index with high probability
# yhat = np.argmax(yhat)
# # convert index to word
# word = idx_to_word(yhat, tokenizer)
# # stop if word not found
# if word is None:
# break
# # append word as input for generating the next word
# in_text += " " + word
# # stop if we reach end tag
# if word == 'endseq':
# break
# return in_text
#
# # Streamlit app
# def main():
# st.title("Image Caption Generator 📷 ➡️ 📝")
#
# # Choose an input option: Upload or URL
# input_option = st.radio("Select an input option:", ("Upload Image", "Image URL"))
#
# if input_option == "Upload Image":
# # Upload an image
# uploaded_image = st.file_uploader("Choose an image...", type=["jpg", "png", "jpeg"])
#
# if uploaded_image is not None:
# image = Image.open(uploaded_image)
# st.image(image, caption="Uploaded Image", use_column_width=True)
#
# # Generate caption button
# if st.button("Generate Caption"):
# # Preprocess the uploaded image
# new_image = preprocess_image(uploaded_image)
#
# # Generate features for the new image using the pre-trained VGG16 model
# vgg_model = VGG16()
# vgg_model = Model(inputs=vgg_model.inputs, outputs=vgg_model.layers[-2].output)
# new_image_features = vgg_model.predict(np.array([new_image]), verbose=0)
#
# # Predict caption for the new image
# generated_caption = predict_caption(model_1, new_image_features, tokenizer, max_length=35)
# generated_caption = generated_caption.replace('startseq', '').replace('endseq', '').strip()
# generated_caption = generated_caption.capitalize()
#
# # Display the generated caption
# st.write("Generated Caption:", generated_caption + ".")
#
# elif input_option == "Image URL":
# # Input image URL
# image_url = st.text_input("Enter the image URL:")
#
# if st.button("Generate Caption") and image_url:
# # Preprocess the image from URL
# new_image = preprocess_image_url(image_url)
#
# # Generate caption button
# if st.button("Generate Caption"):
# # Preprocess the uploaded image
# new_image = preprocess_image(image_url)
#
# # Generate features for the new image using the pre-trained VGG16 model
# vgg_model = VGG16()
# vgg_model = Model(inputs=vgg_model.inputs, outputs=vgg_model.layers[-2].output)
# new_image_features = vgg_model.predict(np.array([new_image]), verbose=0)
#
# # Display the image
# image = Image.open(BytesIO(requests.get(image_url).content))
# st.image(image, caption="Image", use_column_width=True)
#
# # Predict caption for the new image
# generated_caption = predict_caption(model_1, new_image_features, tokenizer, max_length=35)
# generated_caption = generated_caption.replace('startseq', '').replace('endseq', '').strip()
# generated_caption = generated_caption.capitalize()
#
# # Display the generated caption
# st.write("Generated Caption:", generated_caption + ".")
#
# if __name__ == "__main__":
# main()
#
# import streamlit as st
# import numpy as np
# import pickle
# import tensorflow as tf
# from keras.models import Model
# from keras.applications.vgg16 import VGG16, preprocess_input
# from keras.preprocessing.image import load_img, img_to_array
# from keras.models import load_model
# from keras.preprocessing.sequence import pad_sequences
# from PIL import Image
# import requests
# from io import BytesIO
#
# # Preprocess the uploaded image
# def preprocess_image(uploaded_image):
# image = Image.open(uploaded_image)
# image = image.resize((224, 224)) # Resize the image to match VGG16 input size
# image = np.array(image)
# image = preprocess_input(image)
# return image
#
# # Preprocess the image from URL
# def preprocess_image_url(image_url):
# response = requests.get(image_url)
# image = Image.open(BytesIO(response.content))
# image = image.resize((224, 224)) # Resize the image to match VGG16 input size
# image = np.array(image)
# image = preprocess_input(image)
# return image
#
# # Load MobileNetV2 model
# model_1 = load_model("best_mode_vgg_40.h5", compile=False)
#
# # Load the tokenizer
# with open("tokenizer40.pickle", 'rb') as handle:
# tokenizer = pickle.load(handle)
#
# def idx_to_word(integer, tokenizer):
# for word, index in tokenizer.word_index.items():
# if index == integer:
# return word
# return None
#
# # Function to generate captions
# def predict_caption(model, image, tokenizer, max_length):
# # add start tag for generation process
# in_text = 'startseq'
# # iterate over the max length of sequence
# for i in range(max_length):
# # encode input sequence
# sequence = tokenizer.texts_to_sequences([in_text])[0]
# # pad the sequence
# sequence = pad_sequences([sequence], max_length)
# # predict next word
# yhat = model.predict([image, sequence], verbose=0)
# # get index with high probability
# yhat = np.argmax(yhat)
# # convert index to word
# word = idx_to_word(yhat, tokenizer)
# # stop if word not found
# if word is None:
# break
# # append word as input for generating the next word
# in_text += " " + word
# # stop if we reach end tag
# if word == 'endseq':
# break
# return in_text
#
# # Streamlit app
# def main():
# st.title("Image Caption Generator 📷 ➡️ 📝")
#
# # Choose an input option: Upload or URL
# input_option = st.radio("Select an input option:", ("Upload Image", "Image URL"))
#
# if input_option == "Upload Image":
# # Upload an image
# uploaded_image = st.file_uploader("Choose an image...", type=["jpg", "png", "jpeg"])
#
# if uploaded_image is not None:
# image = Image.open(uploaded_image)
# st.image(image, caption="Uploaded Image", use_column_width=True)
#
# # Generate caption button
# if st.button("Generate Caption"):
# # Preprocess the uploaded image
# new_image = preprocess_image(uploaded_image)
#
# # Generate features for the new image using the pre-trained VGG16 model
# vgg_model = VGG16()
# vgg_model = Model(inputs=vgg_model.inputs, outputs=vgg_model.layers[-2].output)
# new_image_features = vgg_model.predict(np.array([new_image]), verbose=0)
#
# # Predict caption for the new image
# generated_caption = predict_caption(model_1, new_image_features, tokenizer, max_length=35)
# generated_caption = generated_caption.replace('startseq', '').replace('endseq', '').strip()
# generated_caption = generated_caption.capitalize()
#
# # Display the generated caption
# st.write("Generated Caption:", generated_caption + ".")
#
# elif input_option == "Image URL":
# # Input image URL
# image_url = st.text_input("Enter the image URL:")
#
# if st.button("Generate Caption") and image_url:
# # Preprocess the image from URL
# new_image = preprocess_image_url(image_url)
#
# # Generate features for the new image using the pre-trained VGG16 model
# vgg_model = VGG16()
# vgg_model = Model(inputs=vgg_model.inputs, outputs=vgg_model.layers[-2].output)
# new_image_features = vgg_model.predict(np.array([new_image]), verbose=0)
#
# # Display the image
# image = Image.open(BytesIO(requests.get(image_url).content))
# st.image(image, caption="Image", use_column_width=True)
#
# # Predict caption for the new image
# generated_caption = predict_caption(model_1, new_image_features, tokenizer, max_length=35)
# generated_caption = generated_caption.replace('startseq', '').replace('endseq', '').strip()
# generated_caption = generated_caption.capitalize()
#
# # Display the generated caption
# st.write("Generated Caption:", generated_caption + ".")
#
# if __name__ == "__main__":
# main()
import streamlit as st
import numpy as np
import pickle
import tensorflow as tf
from keras.models import Model
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.preprocessing.image import load_img, img_to_array
from keras.models import load_model
from keras.preprocessing.sequence import pad_sequences
from PIL import Image
import requests
from io import BytesIO
def set_bg_hack_url():
'''
A function to unpack an image from url and set as bg.
Returns
-------
The background.
'''
st.markdown(
f"""
<style>
.stApp {{
background-image: url("https://images.unsplash.com/photo-1500468415400-191607326b6a?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=2070&q=80");
background-size: 100% 100%;
background-repeat: no-repeat;
background-position: center center;
width: 100%;
height: 100%;
}}
</style>
""",
unsafe_allow_html=True
)
print(set_bg_hack_url())
# Preprocess the uploaded image
def preprocess_image(uploaded_image):
image = Image.open(uploaded_image)
image = image.resize((224, 224)) # Resize the image to match VGG16 input size
image = np.array(image)
image = preprocess_input(image)
return image
# Preprocess the image from URL
def preprocess_image_url(image_url):
response = requests.get(image_url)
image = Image.open(BytesIO(response.content))
image = image.resize((224, 224)) # Resize the image to match VGG16 input size
image = np.array(image)
image = preprocess_input(image)
return image
# Load MobileNetV2 model
model_1 = load_model("best_mode_vgg_40.h5", compile=False)
# Load the tokenizer
with open("tokenizer40.pickle", 'rb') as handle:
tokenizer = pickle.load(handle)
def idx_to_word(integer, tokenizer):
for word, index in tokenizer.word_index.items():
if index == integer:
return word
return None
# Function to generate captions
def predict_caption(model, image, tokenizer, max_length):
# add start tag for generation process
in_text = 'startseq'
# iterate over the max length of sequence
for i in range(max_length):
# encode input sequence
sequence = tokenizer.texts_to_sequences([in_text])[0]
# pad the sequence
sequence = pad_sequences([sequence], max_length)
# predict next word
yhat = model.predict([image, sequence], verbose=0)
# get index with high probability
yhat = np.argmax(yhat)
# convert index to word
word = idx_to_word(yhat, tokenizer)
# stop if word not found
if word is None:
break
# append word as input for generating the next word
in_text += " " + word
# stop if we reach end tag
if word == 'endseq':
break
return in_text
def main():
st.title("Image Caption Generator 📷 ➡️ 📝")
# Choose an input option: Upload or URL
input_option = st.radio("Select an input option:", ("Upload Image", "Image URL"))
if input_option == "Upload Image":
# Upload an image
uploaded_image = st.file_uploader("Choose an image...", type=["jpg", "png", "jpeg"])
if uploaded_image is not None:
image = Image.open(uploaded_image)
st.image(image, caption="Uploaded Image", use_column_width=True)
# Generate caption button
if st.button("Generate Caption"):
# Preprocess the uploaded image
new_image = preprocess_image(uploaded_image)
# Generate features for the new image using the pre-trained VGG16 model
vgg_model = VGG16()
vgg_model = Model(inputs=vgg_model.inputs, outputs=vgg_model.layers[-2].output)
new_image_features = vgg_model.predict(np.array([new_image]), verbose=0)
# Predict caption for the new image
generated_caption = predict_caption(model_1, new_image_features, tokenizer, max_length=35)
generated_caption = generated_caption.replace('startseq', '').replace('endseq', '').strip()
generated_caption = generated_caption.capitalize()
# Display the generated caption
st.markdown('#### Predicted Captions:')
st.markdown(f"<p style='font-size:25px'><i>{generated_caption}</i>.</p>",
unsafe_allow_html=True)
elif input_option == "Image URL":
# Input image URL
image_url = st.text_input("Enter the image URL:")
if image_url:
# Display the image
image = Image.open(BytesIO(requests.get(image_url).content))
st.image(image, caption="Image", use_column_width=True)
# Generate caption button
if st.button("Generate Caption"):
# Preprocess the image from URL
new_image = preprocess_image_url(image_url)
# Generate features for the new image using the pre-trained VGG16 model
vgg_model = VGG16()
vgg_model = Model(inputs=vgg_model.inputs, outputs=vgg_model.layers[-2].output)
new_image_features = vgg_model.predict(np.array([new_image]), verbose=0)
# Generate caption for the new image
generated_caption = predict_caption(model_1, new_image_features, tokenizer, max_length=35)
generated_caption = generated_caption.replace('startseq', '').replace('endseq', '').strip()
generated_caption = generated_caption.capitalize()
# Display the generated caption
st.markdown('#### Predicted Captions:')
st.markdown(f"<p style='font-size:25px'><i>{generated_caption}</i>.</p>",
unsafe_allow_html=True)
if __name__ == "__main__":
main()
# import streamlit as st
# import numpy as np
# import pickle
# import tensorflow as tf
# from tensorflow import keras
# from keras.models import Model
# from keras.applications.vgg16 import VGG16, preprocess_input
# from keras.preprocessing.image import load_img, img_to_array
# from keras.models import load_model
# from keras.preprocessing.sequence import pad_sequences
# from PIL import Image
# import requests
# from io import BytesIO
# import pyttsx3
# import base64
# def set_bg_hack_url():
# '''
# A function to unpack an image from url and set as bg.
# Returns
# -------
# The background.
# '''
# st.markdown(
# f"""
# <style>
# .stApp {{
# background-image: url("https://images.unsplash.com/photo-1500468415400-191607326b6a?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=2070&q=80");
# background-size: 100% 100%;
# background-repeat: no-repeat;
# background-position: center center;
# width: 100%;
# height: 100%;
# }}
# </style>
# """,
# unsafe_allow_html=True
# )
# print(set_bg_hack_url())
# #
# #
# # # Preprocess the uploaded image
# def preprocess_image(uploaded_image):
# image = Image.open(uploaded_image)
# image = image.resize((224, 224)) # Resize the image to match VGG16 input size
# image = np.array(image)
# image = preprocess_input(image)
# return image
# # Preprocess the image from URL
# def preprocess_image_url(image_url):
# response = requests.get(image_url)
# image = Image.open(BytesIO(response.content))
# image = image.resize((224, 224)) # Resize the image to match VGG16 input size
# image = np.array(image)
# image = preprocess_input(image)
# return image
# # Load MobileNetV2 model
# model_1 = load_model("best_mode_vgg_40.h5", compile=False)
# # Load the tokenizer
# with open("tokenizer40.pickle", 'rb') as handle:
# tokenizer = pickle.load(handle)
# def idx_to_word(integer, tokenizer):
# for word, index in tokenizer.word_index.items():
# if index == integer:
# return word
# return None
# # Function to generate captions
# def predict_caption(model, image, tokenizer, max_length):
# # add start tag for generation process
# in_text = 'startseq'
# # iterate over the max length of sequence
# for i in range(max_length):
# # encode input sequence
# sequence = tokenizer.texts_to_sequences([in_text])[0]
# # pad the sequence
# sequence = pad_sequences([sequence], max_length)
# # predict next word
# yhat = model.predict([image, sequence], verbose=0)
# # get index with high probability
# yhat = np.argmax(yhat)
# # convert index to word
# word = idx_to_word(yhat, tokenizer)
# # stop if word not found
# if word is None:
# break
# # append word as input for generating the next word
# in_text += " " + word
# # stop if we reach end tag
# if word == 'endseq':
# break
# return in_text
# def generate_audio(caption):
# engine = pyttsx3.init()
# engine.save_to_file(caption, 'caption_audio.mp3')
# engine.runAndWait()
# def main():
# st.title("Image Caption Generator 📷 ➡️ 📝")
# # Choose an input option: Upload or URL
# input_option = st.radio("Select an input option:", ("Upload Image", "Image URL"))
# if input_option == "Upload Image":
# # Upload an image
# uploaded_image = st.file_uploader("Choose an image...", type=["jpg", "png", "jpeg"])
# if uploaded_image is not None:
# image = Image.open(uploaded_image)
# st.image(image, caption="Uploaded Image", use_column_width=True)
# # Generate caption button
# if st.button("Generate Caption"):
# # Preprocess the uploaded image
# new_image = preprocess_image(uploaded_image)
# # Generate features for the new image using the pre-trained VGG16 model
# vgg_model = VGG16()
# vgg_model = Model(inputs=vgg_model.inputs, outputs=vgg_model.layers[-2].output)
# new_image_features = vgg_model.predict(np.array([new_image]), verbose=0)
# # Predict caption for the new image
# generated_caption = predict_caption(model_1, new_image_features, tokenizer, max_length=35)
# generated_caption = generated_caption.replace('startseq', '').replace('endseq', '').strip()
# generated_caption = generated_caption.capitalize()
# # Generate audio from the caption
# generate_audio(generated_caption)
# # Display the generated caption
# st.markdown('#### Predicted Caption:')
# st.markdown(f"<p style='font-size:25px'><i>{generated_caption}</i>.</p>",
# unsafe_allow_html=True)
# # Display the audio
# st.audio('caption_audio.mp3')
# elif input_option == "Image URL":
# # Input image URL
# image_url = st.text_input("Enter the image URL:")
# if image_url:
# # Display the image
# image = Image.open(BytesIO(requests.get(image_url).content))
# st.image(image, caption="Image", use_column_width=True)
# # Generate caption button
# if st.button("Generate Caption"):
# # Preprocess the image from URL
# new_image = preprocess_image_url(image_url)
# # Generate features for the new image using the pre-trained VGG16 model
# vgg_model = VGG16()
# vgg_model = Model(inputs=vgg_model.inputs, outputs=vgg_model.layers[-2].output)
# new_image_features = vgg_model.predict(np.array([new_image]), verbose=0)
# # Generate caption for the new image
# generated_caption = predict_caption(model_1, new_image_features, tokenizer, max_length=35)
# generated_caption = generated_caption.replace('startseq', '').replace('endseq', '').strip()
# generated_caption = generated_caption.capitalize()
# # Generate audio from the caption
# generate_audio(generated_caption)
# # Display the generated caption
# st.markdown('#### Predicted Caption:')
# st.markdown(f"<p style='font-size:25px'><i>{generated_caption}</i>.</p>",
# unsafe_allow_html=True)
# # Display the audio
# st.audio('caption_audio.mp3')
# if __name__ == "__main__":
# main()
# print(f"Streamlit version: {st.__version__}")