|
from selenium import webdriver |
|
from selenium.webdriver.support.ui import WebDriverWait |
|
from selenium.webdriver.common.by import By |
|
import urllib.request |
|
from transformers import pipeline |
|
import streamlit as st |
|
import os |
|
import subprocess |
|
import chromedriver_autoinstaller |
|
|
|
chrome_options = webdriver.ChromeOptions() |
|
chrome_options.add_argument('--headless') |
|
chrome_options.add_argument('--no-sandbox') |
|
chrome_options.add_argument('--disable-dev-shm-usage') |
|
chromedriver_autoinstaller.install() |
|
|
|
driver = webdriver.Chrome(options=chrome_options) |
|
|
|
def determine_overall_sentiment(sentiments_list): |
|
flat_sentiments_list = [sentiment for sublist in sentiments_list for sentiment_list in sublist for sentiment in sentiment_list] |
|
|
|
label_counts = {'positive': 0, 'neutral': 0, 'negative': 0} |
|
|
|
for sentiment in flat_sentiments_list: |
|
label_counts[sentiment['label']] += 1 |
|
|
|
overall_sentiment = max(label_counts, key=label_counts.get) |
|
|
|
return overall_sentiment |
|
|
|
def get_sentiment_from_yt(query): |
|
driver.get(f"https://www.youtube.com/results?search_query={query}") |
|
wait = WebDriverWait(driver, 10) |
|
user_data = driver.find_elements(By.XPATH,'//*[@id="video-title"]') |
|
links = [] |
|
for i in user_data: |
|
attr = i.get_attribute("href") |
|
if attr is not None and "youtube.com/shorts" not in attr: |
|
video_id_start = attr.find("?v=") |
|
if video_id_start != -1: |
|
video_id_end = attr.find("&", video_id_start) |
|
if video_id_end == -1: |
|
video_id_end = None |
|
video_id = attr[video_id_start + 3: video_id_end] |
|
links.append(video_id) |
|
for i in range(len(links)): |
|
urllib.request.urlretrieve(f"https://i.ytimg.com/vi/{links[i]}/maxresdefault.jpg", f"{query}/{links[i]}.jpg") |
|
|
|
pipe_Image2Text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large", device = 0) |
|
pipe_GrammarCorrection = pipeline("text2text-generation", model="pszemraj/flan-t5-large-grammar-synthesis", device = 0) |
|
pipe_SentimentAnalysis = pipeline("text-classification", model="lxyuan/distilbert-base-multilingual-cased-sentiments-student", return_all_scores=True, device = 0) |
|
|
|
descriptions = [] |
|
for i in range(len(links)): |
|
filepath = f"{query}/{links[i]}.jpg" |
|
if os.path.exists(filepath): |
|
descriptions.append(pipe_Image2Text(filepath)[0]['generated_text']) |
|
|
|
sentiments = [] |
|
for i in range(len(descriptions)): |
|
res = pipe_GrammarCorrection(descriptions[i]) |
|
descriptions[i] = res[0]['generated_text'] |
|
sentiments.append(pipe_SentimentAnalysis(descriptions[i])) |
|
|
|
overall_sentiment = determine_overall_sentiment(sentiments) |
|
|
|
return links, descriptions, sentiments, overall_sentiment |
|
|
|
st.title("YouTube Sentiment Analysis") |
|
|
|
query = st.text_input("Enter YouTube search query:") |
|
if st.button("Get Sentiments"): |
|
links, descriptions, sentiments, overall_sentiment = get_sentiment_from_yt(query) |
|
st.write(f"Overall Sentiment: {overall_sentiment}") |
|
st.write("Thumbnails, Descriptions, and Sentiments:") |
|
for i in range(len(links)): |
|
st.image(f"{query}/{links[i]}.jpg", caption=f"Video {i+1}") |
|
st.write(f"**Description {i+1}:** {descriptions[i]}") |
|
st.write(f"**Sentiments {i+1}:** {sentiments[i]}") |
|
st.write("---") |
|
|