from transformers import pipeline import streamlit as st from PIL import Image import requests from io import BytesIO from sentence_transformers import SentenceTransformer import numpy as np import faiss import pandas as pd # Initialize the image-to-text pipeline image_to_text = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning") # Initialize the sentence transformer model for embeddings sentence_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') # Function to load images from URL def load_image_from_url(url): try: response = requests.get(url) img = Image.open(BytesIO(response.content)) return img except Exception as e: st.error(f"Error loading image from URL: {e}") return None # Load the dataset and create FAISS index def load_dataset_and_create_index(): df = pd.read_csv('/path/to/your/amazon_reviews.csv') review_texts = df['reviewText'].dropna().tolist() review_embeddings = sentence_model.encode(review_texts) dimension = review_embeddings.shape[1] faiss_index = faiss.IndexFlatL2(dimension) faiss_index.add(np.array(review_embeddings)) return faiss_index, review_texts faiss_index, review_texts = load_dataset_and_create_index() # Find top N similar reviews def find_top_n_similar_reviews(query, faiss_index, review_texts, top_n=3): query_embedding = sentence_model.encode([query]) _, indices = faiss_index.search(query_embedding, top_n) return [review_texts[i] for i in indices[0]] st.title('Image Captioning and Review Visualization Application') input_type = st.radio("Select input type:", ("Upload Image", "Image URL", "Text")) image = None text_input = "" if input_type == "Upload Image": uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"]) if uploaded_file is not None: image = Image.open(uploaded_file) st.image(image, caption='Uploaded Image', use_column_width=True) elif input_type == "Image URL": image_url = st.text_input("Enter the image URL here:", "") if image_url: image = load_image_from_url(image_url) elif input_type == "Text": text_input = st.text_area("Enter text here:", "") if st.button('Generate Caption'): result_text = "" if input_type in ["Upload Image", "Image URL"] and image: with st.spinner("Generating caption..."): result = image_to_text(image_url if input_type == "Image URL" else uploaded_file) result_text = result[0]['generated_text'] if result else "Failed to generate caption." elif input_type == "Text" and text_input: result_text = text_input if result_text: st.success(f'Generated Caption: {result_text}') similar_reviews = find_top_n_similar_reviews(result_text, faiss_index, review_texts) st.write("Similar Reviews Based on the Caption:") for review in similar_reviews: st.write(review) else: st.error("Please provide input.")