from selenium import webdriver from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By import urllib.request from transformers import pipeline import streamlit as st import os import subprocess import chromedriver_autoinstaller chrome_options = webdriver.ChromeOptions() chrome_options.add_argument('--headless') chrome_options.add_argument('--no-sandbox') chrome_options.add_argument('--disable-dev-shm-usage') chromedriver_autoinstaller.install() driver = webdriver.Chrome(options=chrome_options) def determine_overall_sentiment(sentiments_list): flat_sentiments_list = [sentiment for sublist in sentiments_list for sentiment_list in sublist for sentiment in sentiment_list] label_counts = {'positive': 0, 'neutral': 0, 'negative': 0} for sentiment in flat_sentiments_list: label_counts[sentiment['label']] += 1 overall_sentiment = max(label_counts, key=label_counts.get) return overall_sentiment def get_sentiment_from_yt(query): driver.get(f"https://www.youtube.com/results?search_query={query}") wait = WebDriverWait(driver, 10) user_data = driver.find_elements(By.XPATH,'//*[@id="video-title"]') links = [] for i in user_data: attr = i.get_attribute("href") if attr is not None and "youtube.com/shorts" not in attr: video_id_start = attr.find("?v=") if video_id_start != -1: video_id_end = attr.find("&", video_id_start) if video_id_end == -1: video_id_end = None video_id = attr[video_id_start + 3: video_id_end] links.append(video_id) for i in range(len(links)): urllib.request.urlretrieve(f"https://i.ytimg.com/vi/{links[i]}/maxresdefault.jpg", f"{query}/{links[i]}.jpg") pipe_Image2Text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large", device = 0) pipe_GrammarCorrection = pipeline("text2text-generation", model="pszemraj/flan-t5-large-grammar-synthesis", device = 0) pipe_SentimentAnalysis = pipeline("text-classification", model="lxyuan/distilbert-base-multilingual-cased-sentiments-student", return_all_scores=True, device = 0) descriptions = [] for i in range(len(links)): filepath = f"{query}/{links[i]}.jpg" if os.path.exists(filepath): descriptions.append(pipe_Image2Text(filepath)[0]['generated_text']) sentiments = [] for i in range(len(descriptions)): res = pipe_GrammarCorrection(descriptions[i]) descriptions[i] = res[0]['generated_text'] sentiments.append(pipe_SentimentAnalysis(descriptions[i])) overall_sentiment = determine_overall_sentiment(sentiments) return links, descriptions, sentiments, overall_sentiment st.title("YouTube Sentiment Analysis") query = st.text_input("Enter YouTube search query:") if st.button("Get Sentiments"): links, descriptions, sentiments, overall_sentiment = get_sentiment_from_yt(query) st.write(f"Overall Sentiment: {overall_sentiment}") st.write("Thumbnails, Descriptions, and Sentiments:") for i in range(len(links)): st.image(f"{query}/{links[i]}.jpg", caption=f"Video {i+1}") st.write(f"**Description {i+1}:** {descriptions[i]}") st.write(f"**Sentiments {i+1}:** {sentiments[i]}") st.write("---")