Spaces:
Build error
Build error
| import os | |
| import pandas as pd | |
| from apify_client import ApifyClient | |
| # Constants | |
| TWEETS_COLUMNS_LIST = [ | |
| "url", | |
| "createdAt", | |
| "id", | |
| "isReply", | |
| "inReplyToId", | |
| "isRetweet", | |
| "isQuote", | |
| "viewCount", | |
| "retweetCount", | |
| "likeCount", | |
| "replyCount", | |
| "lang", | |
| "author__createdAt", | |
| "author__location", | |
| "author__name", | |
| "author__id", | |
| "author__description", | |
| "author__followers", | |
| "author__verified", | |
| "text", | |
| ] | |
| REMOVE_COLUMNS_COMMENTS = [ | |
| "author__name", | |
| "author__id", | |
| "author__description", | |
| ] | |
| INT_COLUMNS = [ | |
| "viewCount", | |
| "retweetCount", | |
| "likeCount", | |
| "replyCount", | |
| "author__followers", | |
| ] | |
| APIFY_ACTOR_ID = os.getenv("APIFY_ACTOR_ID") | |
| APIFY_TOKEN = os.getenv("APIFY_TOKEN") | |
| # Start client | |
| client = ApifyClient(APIFY_TOKEN) | |
| def flatten_response(response): | |
| """Returns a flat dictionary with unnested values""" | |
| return { | |
| "url": response.get("url"), | |
| "createdAt": pd.to_datetime(response.get("createdAt")), | |
| "id": response.get("id"), | |
| "isReply": response.get("isReply"), | |
| "inReplyToId": response.get( | |
| "inReplyToId", None | |
| ), # Uses None if inReply is false | |
| "isRetweet": response.get("isRetweet"), | |
| "isQuote": response.get("isQuote"), | |
| "viewCount": response.get("viewCount"), | |
| "retweetCount": response.get("retweetCount"), | |
| "likeCount": response.get("likeCount"), | |
| "replyCount": response.get("replyCount"), | |
| "lang": response.get("lang"), | |
| "author__createdAt": pd.to_datetime(response["author"].get("createdAt")), | |
| "author__location": response["author"].get("location"), | |
| "author__name": response["author"].get("name"), | |
| "author__id": response["author"].get("id"), | |
| "author__description": response["author"].get("description"), | |
| "author__followers": response["author"].get("followers"), | |
| "author__verified": response["author"].get("isVerified"), | |
| "text": response.get("text"), | |
| } | |
| def fetch_main_tweet_dataframe(url): | |
| """Given a tweet URL, returns a dataframe for it""" | |
| # Input validation | |
| if "x.com" not in url and "twitter.com" not in url: | |
| return {"error": "Input is not a tweet URL"} | |
| run_input = { | |
| "startUrls": [url], | |
| } | |
| run = client.actor(APIFY_ACTOR_ID).call(run_input=run_input) | |
| response = [ | |
| dictionary | |
| for dictionary in client.dataset(run["defaultDatasetId"]).iterate_items() | |
| ][0] | |
| flattened_data = flatten_response(response) | |
| # Convert the flattened dictionary to a DataFrame and return | |
| return pd.DataFrame([flattened_data], columns=TWEETS_COLUMNS_LIST) | |
| def fetch_comments_dataframe(url): | |
| """Given a tweet URL, returns a dataframe for the comments related to that tweet""" | |
| # Input validation | |
| if "x.com" not in url and "twitter.com" not in url: | |
| return {"error": "Input is not a tweet URL"} | |
| one_tweet_id = str(url.split("/")[-1]) | |
| run_input_comment = { | |
| "conversationIds": [one_tweet_id], | |
| "tweetLanguage": "es", | |
| "maxItems": 50, | |
| } | |
| run_comment = client.actor(APIFY_ACTOR_ID).call(run_input=run_input_comment) | |
| response_comment = [ | |
| dictionary | |
| for dictionary in client.dataset( | |
| run_comment["defaultDatasetId"] | |
| ).iterate_items() | |
| ] | |
| flattened_responses = [flatten_response(response) for response in response_comment] | |
| include_columns = [ | |
| column | |
| for column in TWEETS_COLUMNS_LIST | |
| if column not in REMOVE_COLUMNS_COMMENTS | |
| ] | |
| # Convert the flattened dictionary to a DataFrame and return | |
| return pd.DataFrame(flattened_responses, columns=include_columns) | |