Spaces:
Runtime error
Runtime error
# Now, trying using the youtube video id | |
from fastapi import FastAPI, HTTPException | |
from pydantic import BaseModel | |
from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
import torch | |
import pandas as pd | |
import googleapiclient.discovery | |
app = FastAPI() | |
api_service_name = "youtube" | |
api_version = "v3" | |
DEVELOPER_KEY = "AIzaSyC4Vx8G6nm3Ow9xq7NluTuCCJ1d_5w4YPE" # Replace with your actual YouTube API key | |
youtube = googleapiclient.discovery.build(api_service_name, api_version, developerKey=DEVELOPER_KEY) | |
class SingleInput(BaseModel): | |
video_id: str | |
# Load the BERT tokenizer and model | |
tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment") | |
model = AutoModelForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment") | |
def scrape_comments(video_id): | |
request = youtube.commentThreads().list( | |
part="snippet", | |
videoId=video_id, | |
maxResults=100 # You can adjust the maximum number of comments to fetch | |
) | |
response = request.execute() | |
comments = [] | |
for item in response['items']: | |
comment = item['snippet']['topLevelComment']['snippet'] | |
comments.append(comment['textDisplay']) | |
comments_df = pd.DataFrame(comments, columns=['comment']) | |
return comments_df | |
def analyze_sentiment_endpoint(data: SingleInput): | |
video_id = data.video_id | |
comments_df = scrape_comments(video_id) | |
if comments_df.empty: | |
raise HTTPException(status_code=400, detail="No comments found for the provided video ID") | |
tokenized_comments = tokenizer(list(comments_df['comment']), padding=True, truncation=True, return_tensors="pt") | |
# Perform sentiment analysis | |
with torch.no_grad(): | |
outputs = model(**tokenized_comments) | |
logits = outputs.logits | |
# Determine sentiment for each comment | |
sentiment_ids = torch.argmax(logits, dim=1).tolist() | |
sentiment_labels = [] | |
for sentiment_id in sentiment_ids: | |
if sentiment_id == 2: | |
sentiment_labels.append("Positive") | |
elif sentiment_id == 0: | |
sentiment_labels.append("Negative") | |
else: | |
sentiment_labels.append("Neutral") | |
sentiment_counts = { | |
"positive": sentiment_labels.count("Positive"), | |
"negative": sentiment_labels.count("Negative"), | |
"neutral": sentiment_labels.count("Neutral") | |
} | |
return {"sentiment_counts": sentiment_counts, "comments_count": len(comments_df)} | |