MihirRajeshPanchal's picture
Update app.py
5a657bd verified
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
import urllib.request
from transformers import pipeline
import streamlit as st
import os
import subprocess
import chromedriver_autoinstaller
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chromedriver_autoinstaller.install()
driver = webdriver.Chrome(options=chrome_options)
def determine_overall_sentiment(sentiments_list):
flat_sentiments_list = [sentiment for sublist in sentiments_list for sentiment_list in sublist for sentiment in sentiment_list]
label_counts = {'positive': 0, 'neutral': 0, 'negative': 0}
for sentiment in flat_sentiments_list:
label_counts[sentiment['label']] += 1
overall_sentiment = max(label_counts, key=label_counts.get)
return overall_sentiment
def get_sentiment_from_yt(query):
driver.get(f"https://www.youtube.com/results?search_query={query}")
wait = WebDriverWait(driver, 10)
user_data = driver.find_elements(By.XPATH,'//*[@id="video-title"]')
links = []
for i in user_data:
attr = i.get_attribute("href")
if attr is not None and "youtube.com/shorts" not in attr:
video_id_start = attr.find("?v=")
if video_id_start != -1:
video_id_end = attr.find("&", video_id_start)
if video_id_end == -1:
video_id_end = None
video_id = attr[video_id_start + 3: video_id_end]
links.append(video_id)
for i in range(len(links)):
urllib.request.urlretrieve(f"https://i.ytimg.com/vi/{links[i]}/maxresdefault.jpg", f"{query}/{links[i]}.jpg")
pipe_Image2Text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large", device = 0)
pipe_GrammarCorrection = pipeline("text2text-generation", model="pszemraj/flan-t5-large-grammar-synthesis", device = 0)
pipe_SentimentAnalysis = pipeline("text-classification", model="lxyuan/distilbert-base-multilingual-cased-sentiments-student", return_all_scores=True, device = 0)
descriptions = []
for i in range(len(links)):
filepath = f"{query}/{links[i]}.jpg"
if os.path.exists(filepath):
descriptions.append(pipe_Image2Text(filepath)[0]['generated_text'])
sentiments = []
for i in range(len(descriptions)):
res = pipe_GrammarCorrection(descriptions[i])
descriptions[i] = res[0]['generated_text']
sentiments.append(pipe_SentimentAnalysis(descriptions[i]))
overall_sentiment = determine_overall_sentiment(sentiments)
return links, descriptions, sentiments, overall_sentiment
st.title("YouTube Sentiment Analysis")
query = st.text_input("Enter YouTube search query:")
if st.button("Get Sentiments"):
links, descriptions, sentiments, overall_sentiment = get_sentiment_from_yt(query)
st.write(f"Overall Sentiment: {overall_sentiment}")
st.write("Thumbnails, Descriptions, and Sentiments:")
for i in range(len(links)):
st.image(f"{query}/{links[i]}.jpg", caption=f"Video {i+1}")
st.write(f"**Description {i+1}:** {descriptions[i]}")
st.write(f"**Sentiments {i+1}:** {sentiments[i]}")
st.write("---")