Spaces:
Build error
Build error
import time | |
import pprint | |
import csv | |
import selenium | |
from selenium import webdriver | |
from selenium.webdriver.chrome.service import Service | |
from webdriver_manager.chrome import ChromeDriverManager | |
from selenium.webdriver.support.wait import WebDriverWait | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.chrome.options import Options | |
import csv | |
from youtube_comment_scraper_python import * | |
import pandas as pd | |
import plotly.express as px | |
import re | |
import streamlit as st | |
st.title('Youtube WebScrap⛏️') | |
# # ------------------------------------------------------------------------------CHANNEL DATA------------------------------------------------------------------------ | |
chromedriver_autoinstaller.install() | |
# driver = webdriver.Chrome('/usr/bin/google-chrome') | |
chrome_path = '/usr/bin/google-chrome' | |
# Set up Chrome options if needed | |
chrome_options = webdriver.ChromeOptions() | |
# Create the WebDriver instance | |
chrome_options.binary_location = chrome_path | |
driver = webdriver.Chrome(executable_path=chrome_path, options=chrome_options) | |
# driver = webdriver.Chrome() | |
url = st.text_input('Paste the Youtube Channel Link',"") | |
if not url: | |
st.warning('Please input a Link.') | |
st.stop() | |
st.success('Thank you for inputting a link.') | |
# url ='https://www.youtube.com/@YasoobKhalid/videos' | |
name = re.compile(r"[A-Z]\w+") | |
inp = name.findall(url) | |
out = inp[0] | |
st.write('Getting Data from', out, 'channel') | |
driver.get(url) | |
url = input('Enter Youtube Video Url- ') | |
driver.get(url) | |
# # "https://www.youtube.com/@YasoobKhalid/videos" | |
# channel_title = driver.find_element(By.XPATH, '//yt-formatted-string[contains(@class, "ytd-channel-name")]').text | |
handle = driver.find_element(By.XPATH, '//yt-formatted-string[@id="channel-handle"]').text | |
subscriber_count = driver.find_element(By.XPATH, '//yt-formatted-string[@id="subscriber-count"]').text | |
WAIT_IN_SECONDS = 5 | |
last_height = driver.execute_script("return document.documentElement.scrollHeight") | |
while True: | |
# Scroll to the bottom of page | |
driver.execute_script("window.scrollTo(0, arguments[0]);", last_height) | |
# Wait for new videos to show up | |
time.sleep(WAIT_IN_SECONDS) | |
# Calculate new document height and compare it with last height | |
new_height = driver.execute_script("return document.documentElement.scrollHeight") | |
if new_height == last_height: | |
break | |
last_height = new_height | |
thumbnails = driver.find_elements(By.XPATH, '//a[@id="thumbnail"]/yt-image/img') | |
views = driver.find_elements(By.XPATH,'//div[@id="metadata-line"]/span[1]') | |
titles = driver.find_elements(By.ID, "video-title") | |
links = driver.find_elements(By.ID, "video-title-link") | |
# likes = driver.find_elements(By.ID, "video-title-link-likes") | |
videos = [] | |
for title, view, thumb, link in zip(titles, views, thumbnails, links): | |
video_dict = { | |
'title': title.text, | |
'views': view.text, | |
# 'likes': likes.text, | |
'thumbnail': thumb.get_attribute('src'), | |
'link': link.get_attribute('href') | |
} | |
videos.append(video_dict) | |
print(videos) | |
to_csv = videos | |
keys = to_csv[0].keys() | |
with open(r'C:/Users/ashok/OneDrive/Desktop/WebScrap/Youtube/output/people.csv', 'w', newline='', encoding='utf-8') as output_file: | |
dict_writer = csv.DictWriter(output_file, keys) | |
dict_writer.writeheader() | |
dict_writer.writerows(to_csv) | |
df = pd.read_csv(r'C:/Users/ashok/OneDrive/Desktop/WebScrap/Youtube/output/people.csv') | |
st.dataframe(df) | |