# Import all necessary libraries and don't forget to check out Dependencies import streamlit as st from PIL import Image import numpy as np import nltk nltk.download('stopwords') nltk.download('punkt') import pandas as pd import pyperclip import random import easyocr import re from nltk.corpus import stopwords from nltk.tokenize import word_tokenize from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity from transformers import AutoTokenizer, ViTFeatureExtractor, VisionEncoderDecoderModel # Load the model-pretrained model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning") feature_extractor = ViTFeatureExtractor.from_pretrained("nlpconnect/vit-gpt2-image-captioning") tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning") # Function to generate captions def generate_captions(image): image = Image.open(image).convert("RGB") generated_caption = tokenizer.decode(model.generate(feature_extractor(image, return_tensors="pt").pixel_values.to("cpu"))[0]) sentence = generated_caption text_to_remove = "<|endoftext|>" generated_caption = sentence.replace(text_to_remove, "") return generated_caption # kinda-Function easyocr to extract text from the image def image_text(image): img_np = np.array(image) reader = easyocr.Reader(['en']) text = reader.readtext(img_np) detected_text = " ".join([item[1] for item in text]) # Extract individual words, convert to lowercase, and add "#" symbol detected_text= ['#' + entry[1].strip().lower().replace(" ", "") for entry in text] return detected_text # Load NLTK stopwords for filtering stop_words = set(stopwords.words('english')) # Add hashtags to keywords, which have been generated from image captioing def add_hashtags(keywords): hashtags = [] for keyword in keywords: hashtag = '#' + keyword.lower() hashtags.append(hashtag) return hashtags # function to get and add trending Hashtags def trending_hashtags(caption): with open("hashies.txt", "r") as file: hashtags_string = file.read() # Split the hashtags by commas and remove any leading/trailing spaces trending_hashtags = [hashtag.strip() for hashtag in hashtags_string.split(',')] # Create a DataFrame from the hashtags df = pd.DataFrame(trending_hashtags, columns=["Hashtags"]) # Function to extract keywords from a given text def extract_keywords(caption): tokens = word_tokenize(caption) keywords = [token.lower() for token in tokens if token.lower() not in stop_words] return keywords # Extract keywords from caption and trending hashtags caption_keywords = extract_keywords(caption) hashtag_keywords = [extract_keywords(hashtag) for hashtag in df["Hashtags"]] # Function to calculate cosine similarity between two strings def calculate_similarity(text1, text2): tfidf_vectorizer = TfidfVectorizer() tfidf_matrix = tfidf_vectorizer.fit_transform([text1, text2]) similarity_matrix = cosine_similarity(tfidf_matrix[0], tfidf_matrix[1]) return similarity_matrix[0][0] # Calculate similarity between caption and each trending hashtag similarities = [calculate_similarity(' '.join(caption_keywords), ' '.join(keywords)) for keywords in hashtag_keywords] # Sort trending hashtags based on similarity in descending order sorted_hashtags = [hashtag for _, hashtag in sorted(zip(similarities, df["Hashtags"]), reverse=True)] # Select top k relevant hashtags (e.g., top 5) without duplicates and return them selected_hashtags = list(set(sorted_hashtags[:5])) selected_hashtag = [word.strip("'") for word in selected_hashtags] return selected_hashtag # Streamlit app Creation def app(): st.title('Have a :green[Bueatiful pic!] Looking for :orange[Trending Hashtags to post it on your social handle?]. Here is some Help') # create file uploader uploaded_file = st.file_uploader("Upload Picture of your wish!, :violet[magic on the Way! ]", type=["jpg", "jpeg", "png"]) # check if file has been uploaded if uploaded_file is not None: # load the image image = Image.open(uploaded_file).convert("RGB") # Image Captions string = generate_captions(uploaded_file) tokens = word_tokenize(string) keywords = [token.lower() for token in tokens if token.lower() not in stop_words] hashtags = add_hashtags(keywords) # Text Captions from image extracted_text = image_text(image) #Final Hashtags Generation web_hashtags = trending_hashtags(string) combined_hashtags = hashtags + extracted_text + web_hashtags # Shuffle the list randomly random.shuffle(combined_hashtags) combined_hashtags = list(set(item for item in combined_hashtags[:15] if not re.search(r'\d$', item))) # display the image st.image(image, caption='The Uploaded File') all = "\n ".join(combined_hashtags) st.write("Magical hashies have arrived* :sparkles: ") st.write(all) # run the app if __name__ == '__main__': app()