from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
import urllib.request
from transformers import pipeline
import streamlit as st
import os
import subprocess
import chromedriver_autoinstaller
 
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless') 
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chromedriver_autoinstaller.install()

driver = webdriver.Chrome(options=chrome_options)

def determine_overall_sentiment(sentiments_list):
    flat_sentiments_list = [sentiment for sublist in sentiments_list for sentiment_list in sublist for sentiment in sentiment_list]

    label_counts = {'positive': 0, 'neutral': 0, 'negative': 0}

    for sentiment in flat_sentiments_list:
        label_counts[sentiment['label']] += 1

    overall_sentiment = max(label_counts, key=label_counts.get)

    return overall_sentiment

def get_sentiment_from_yt(query):
    driver.get(f"https://www.youtube.com/results?search_query={query}")
    wait = WebDriverWait(driver, 10) 
    user_data = driver.find_elements(By.XPATH,'//*[@id="video-title"]')
    links = []
    for i in user_data:
        attr = i.get_attribute("href") 
        if attr is not None and "youtube.com/shorts" not in attr: 
            video_id_start = attr.find("?v=")
            if video_id_start != -1:
                video_id_end = attr.find("&", video_id_start)
                if video_id_end == -1:
                    video_id_end = None
                video_id = attr[video_id_start + 3: video_id_end]
                links.append(video_id)
    for i in range(len(links)):
        urllib.request.urlretrieve(f"https://i.ytimg.com/vi/{links[i]}/maxresdefault.jpg", f"{query}/{links[i]}.jpg")
    
    pipe_Image2Text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large", device = 0) 
    pipe_GrammarCorrection = pipeline("text2text-generation", model="pszemraj/flan-t5-large-grammar-synthesis", device = 0)
    pipe_SentimentAnalysis = pipeline("text-classification", model="lxyuan/distilbert-base-multilingual-cased-sentiments-student", return_all_scores=True, device = 0)

    descriptions = []
    for i in range(len(links)):
        filepath = f"{query}/{links[i]}.jpg"
        if os.path.exists(filepath):
            descriptions.append(pipe_Image2Text(filepath)[0]['generated_text'])
    
    sentiments = []
    for i in range(len(descriptions)):
        res = pipe_GrammarCorrection(descriptions[i])
        descriptions[i] = res[0]['generated_text']
        sentiments.append(pipe_SentimentAnalysis(descriptions[i]))

    overall_sentiment = determine_overall_sentiment(sentiments)

    return links, descriptions, sentiments, overall_sentiment

st.title("YouTube Sentiment Analysis")

query = st.text_input("Enter YouTube search query:")
if st.button("Get Sentiments"):
    links, descriptions, sentiments, overall_sentiment = get_sentiment_from_yt(query)
    st.write(f"Overall Sentiment: {overall_sentiment}")
    st.write("Thumbnails, Descriptions, and Sentiments:")
    for i in range(len(links)):
        st.image(f"{query}/{links[i]}.jpg", caption=f"Video {i+1}")
        st.write(f"**Description {i+1}:** {descriptions[i]}")
        st.write(f"**Sentiments {i+1}:** {sentiments[i]}")
        st.write("---")