Youtube_data / app.py
tushifire's picture
Good to go
52855f5
import streamlit as st
import os
import googleapiclient.discovery
import pandas as pd
import re
pd.set_option("display.max_colwidth", -1)
def extract_all_comments(video_id,youtube,page_token='',comments_list =[]):
request = youtube.commentThreads().list(part = ['id','snippet'],
maxResults = 100,videoId = video_id ,pageToken= page_token)
response = request.execute()
for comment_details in response['items']:
text_dsiplay = comment_details.get('snippet').get('topLevelComment').get('snippet').get('textDisplay')
text_original = comment_details.get('snippet').get('topLevelComment').get('snippet').get('textOriginal')
likes = comment_details.get('snippet').get('topLevelComment').get('snippet').get('likeCount')
published_at = comment_details.get('snippet').get('topLevelComment').get('snippet').get('publishedAt')
updated_at = comment_details.get('snippet').get('topLevelComment').get('snippet').get('updatedAt')
reply_count = comment_details.get('snippet').get('totalReplyCount')
comments_list.append({'text_dsiplay':text_dsiplay,'text_original':text_original,
'likes':likes,'published_at':published_at,'updated_at':updated_at,
'reply_count':reply_count})
if 'nextPageToken' in response.keys():
if len(comments_list) < 500:
print("getting next batch of comments")
extract_all_comments(video_id = video_id,youtube = youtube,page_token= response['nextPageToken'],comments_list= comments_list)
else:
print("Limiting results for speed up")
st.info('Limiting results for speed up')
return comments_list
def extract_comments_from_video(video_id,youtube_api_key):
try:
youtube = googleapiclient.discovery.build(
api_service_name, api_version, developerKey = youtube_api_key)
found_comments = extract_all_comments(video_id = video_id,youtube = youtube,page_token='')
print(len(found_comments))
comments_df = pd.DataFrame(found_comments)
except:
print("An exception occurred")
return pd.DataFrame()
return comments_df
#os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"
api_service_name = "youtube"
api_version = "v3"
def find_video_id(youtube_video_id):
if 'v=' in youtube_video_id:
video_filter = youtube_video_id.split('v=')[1]
video_filter = str(video_filter.split('&')[0])
return video_filter
return youtube_video_id
st.header('Youtube Comments Extractor')
link='How to create FREE YouTube API for yourself [link](https://blog.hubspot.com/website/how-to-get-youtube-api-key)'
st.markdown(link,unsafe_allow_html=True)
with st.form("my_form"):
st.write('Enter Youtube API key( Will not be stored )')
youtube_api_key = st.text_input('API key',placeholder='For less frequent use,Keep this field empty')
st.write('Enter Youtube Video ID/ Video Link')
youtube_video_id = st.text_input('Video')
submitted = st.form_submit_button("Submit")
if submitted:
youtube_video_id = find_video_id(youtube_video_id)
if len(youtube_api_key)==0:
st.warning('Using Default API,Please create your own for frequent use')
youtube_api_key= os.environ["youtube_api"]
#st.write("API here",os.environ["youtube_api"],len(os.environ["youtube_api"]))
df = extract_comments_from_video(youtube_video_id,youtube_api_key)
if len(df) > 0:
st.dataframe(df)
else:
st.info('This video comments are not found')