aus10powell commited on
Commit
b20b18b
·
1 Parent(s): 8897995

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -40
app.py CHANGED
@@ -8,7 +8,8 @@ import json
8
  import logging
9
  import sys
10
  import spacy
11
- #sys.setrecursionlimit(20000)
 
12
  import pandas as pd
13
  import numpy as np
14
  import os
@@ -22,17 +23,21 @@ from fastapi.staticfiles import StaticFiles
22
  from fastapi.templating import Jinja2Templates
23
 
24
  from rouge_score import rouge_scorer
 
25
  import scripts.sentiment as sentiment
26
  import scripts.twitter_scraper as ts
27
  from scripts import sentiment
28
  from scripts.summarization import bert_summarization
29
  from scripts.twitter_scraper import get_latest_account_tweets
30
- from scripts import twitter_scraper as ts
 
31
  import scripts.utils as utils
 
32
  from scripts import generative
33
  import nltk
34
 
35
  logging.basicConfig(level=logging.INFO)
 
36
 
37
  app = FastAPI()
38
  templates = Jinja2Templates(directory="templates")
@@ -82,35 +87,34 @@ async def get_accounts() -> List[dict]:
82
 
83
  @app.get("/tweets/{username}")
84
  def get_tweets_username(username: str) -> dict:
85
- # if username in username_list:
86
- # query = f"from:{username} since:{start_date} until:{end_date}"
87
- # return ts.get_tweets(query=query)
88
- # else:
89
- # return {"detail": "Account not in scope of project."}
90
-
91
- # Method 1: Using Tweepy method
92
- # df_tweets = get_latest_account_tweets(username)
93
-
94
  # Method 2: Use Snscrape
95
  df_tweets = ts.get_tweets(handle=username)
96
 
97
  if isinstance(df_tweets, pd.DataFrame):
98
- print(df_tweets.head(2))
99
- print(df_tweets.shape)
100
  df_tweets = df_tweets[["handle", "created_at","retweet_count","view_count","like_count", "full_text"]]
101
- df_tweets["created_at"] = df_tweets["created_at"].dt.strftime("%Y-%m-%d %H:%M:%S")
102
- df_tweets = df_tweets.sort_values("created_at", ascending=False)#.tail(10)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  df_tweets_html = df_tweets.to_html(classes="center", index=False, escape=False)
104
- df_tweets.to_html(open('df_tweets_html.html', 'w'))
105
  df_tweets_data = df_tweets.to_dict(orient="records")
106
-
107
- response_data = {
108
- "html": df_tweets_html,
109
- "data": df_tweets_data
110
- }
111
 
112
  return JSONResponse(content=response_data, status_code=200)
113
- # return HTMLResponse(content=df_tweets_html, status_code=200)
114
  else:
115
  print("Error: Failed to retrieve tweets.")
116
  return df_tweets
@@ -214,6 +218,7 @@ async def get_sentiment(username: str) -> Dict[str, Dict[str, float]]:
214
  }
215
 
216
 
 
217
  @app.post("/api/generate")
218
  async def generate_text(request: Request):
219
  """Generate text from a prompt.
@@ -269,10 +274,9 @@ async def generate_summary(request: Request):
269
 
270
  print("*" * 50)
271
  data = await request.json()
272
- print('data',data['tweetsData'])
273
  # Get the list of text
274
- tweets = [t['full_text'] for t in data["tweetsData"]]
275
-
276
 
277
  # Concatenate tweets into a single string
278
  text = " .".join(tweets)
@@ -281,35 +285,25 @@ async def generate_summary(request: Request):
281
  nlp.add_pipe("sentencizer")
282
 
283
  sentences = nlp(text).sents
284
- # sentences = Text8Corpus(text)
285
- # phrases = Phrases(
286
- # sentences, min_count=1, threshold=1, connector_words=ENGLISH_CONNECTOR_WORDS
287
- # )
288
- # first_sentence = next(iter(sentences))
289
- # first_sentence
290
  sentences = list(sentences)
291
- # # Shuffle the list
292
- # random.shuffle(sentences)
293
- # Option 1
294
- # sampled_tweets = random.sample(tweets, int(0.1 * len(tweets)))
295
 
296
  # Option 2
297
  sampled_sentences = random.sample(sentences, int(0.1 * len(sentences)))
298
-
299
  sampled_sentences = [sentiment.tweet_cleaner(s.text) for s in sampled_sentences]
300
 
301
  # Join the strings into one text blob
302
  tweet_blob = " ".join(sampled_sentences)
303
 
304
  # Generate the summary
305
- summary = bert_summarization(
306
- tweet_blob
307
- )
308
- print("Summary:",summary)
309
  # Return the summary
310
  return {"tweets_summary": summary}
311
 
312
 
 
313
  @app.get("/examples1")
314
  async def read_examples():
315
  with open("templates/charts/handle_sentiment_breakdown.html") as f:
@@ -322,3 +316,9 @@ async def read_examples():
322
  with open("templates/charts/handle_sentiment_timesteps.html") as f:
323
  html = f.read()
324
  return HTMLResponse(content=html)
 
 
 
 
 
 
 
8
  import logging
9
  import sys
10
  import spacy
11
+
12
+ # sys.setrecursionlimit(20000)
13
  import pandas as pd
14
  import numpy as np
15
  import os
 
23
  from fastapi.templating import Jinja2Templates
24
 
25
  from rouge_score import rouge_scorer
26
+ # Scripts
27
  import scripts.sentiment as sentiment
28
  import scripts.twitter_scraper as ts
29
  from scripts import sentiment
30
  from scripts.summarization import bert_summarization
31
  from scripts.twitter_scraper import get_latest_account_tweets
32
+ from scripts.sentiment import twitter_sentiment_api_score
33
+ from scripts import twitter_scraper as ts
34
  import scripts.utils as utils
35
+ from scripts import translation
36
  from scripts import generative
37
  import nltk
38
 
39
  logging.basicConfig(level=logging.INFO)
40
+ pd.set_option('display.max_colwidth', 20)
41
 
42
  app = FastAPI()
43
  templates = Jinja2Templates(directory="templates")
 
87
 
88
  @app.get("/tweets/{username}")
89
  def get_tweets_username(username: str) -> dict:
 
 
 
 
 
 
 
 
 
90
  # Method 2: Use Snscrape
91
  df_tweets = ts.get_tweets(handle=username)
92
 
93
  if isinstance(df_tweets, pd.DataFrame):
 
 
94
  df_tweets = df_tweets[["handle", "created_at","retweet_count","view_count","like_count", "full_text"]]
95
+ df_tweets["created_at"] = df_tweets["created_at"].dt.strftime(
96
+ "%Y-%m-%d %H:%M:%S"
97
+ )
98
+ df_tweets = df_tweets.sort_values("created_at", ascending=False)
99
+
100
+ # Additional processing
101
+ logging.info("Running sentiment on tweets")
102
+ sentiments = twitter_sentiment_api_score(
103
+ df_tweets['full_text'].to_list(), use_api=False
104
+ )
105
+ df_tweets["sentiment"] = [s['argmax'] for s in sentiments]
106
+ if username == "alikarimi_ak8":
107
+ p = translation.PersianTextProcessor()
108
+ df_tweets['full_text_translated'] = df_tweets["full_text"].apply(lambda c: p.translate_text(persian_text = c))
109
+
110
+
111
  df_tweets_html = df_tweets.to_html(classes="center", index=False, escape=False)
112
+ df_tweets.to_html(open("df_tweets_html.html", "w"))
113
  df_tweets_data = df_tweets.to_dict(orient="records")
114
+
115
+ response_data = {"html": df_tweets_html, "data": df_tweets_data}
 
 
 
116
 
117
  return JSONResponse(content=response_data, status_code=200)
 
118
  else:
119
  print("Error: Failed to retrieve tweets.")
120
  return df_tweets
 
218
  }
219
 
220
 
221
+ ## APIs: Primarily called by the index page
222
  @app.post("/api/generate")
223
  async def generate_text(request: Request):
224
  """Generate text from a prompt.
 
274
 
275
  print("*" * 50)
276
  data = await request.json()
277
+ print("data", data["tweetsData"])
278
  # Get the list of text
279
+ tweets = [t["full_text"] for t in data["tweetsData"]]
 
280
 
281
  # Concatenate tweets into a single string
282
  text = " .".join(tweets)
 
285
  nlp.add_pipe("sentencizer")
286
 
287
  sentences = nlp(text).sents
288
+
 
 
 
 
 
289
  sentences = list(sentences)
 
 
 
 
290
 
291
  # Option 2
292
  sampled_sentences = random.sample(sentences, int(0.1 * len(sentences)))
293
+
294
  sampled_sentences = [sentiment.tweet_cleaner(s.text) for s in sampled_sentences]
295
 
296
  # Join the strings into one text blob
297
  tweet_blob = " ".join(sampled_sentences)
298
 
299
  # Generate the summary
300
+ summary = bert_summarization(tweet_blob)
301
+ print("Summary:", summary)
 
 
302
  # Return the summary
303
  return {"tweets_summary": summary}
304
 
305
 
306
+ ## Historical Tweets pages
307
  @app.get("/examples1")
308
  async def read_examples():
309
  with open("templates/charts/handle_sentiment_breakdown.html") as f:
 
316
  with open("templates/charts/handle_sentiment_timesteps.html") as f:
317
  html = f.read()
318
  return HTMLResponse(content=html)
319
+
320
+
321
+ # uvicorn --workers=2 app:app
322
+ if __name__ == "__main__":
323
+ # uvicorn.run(app, host="0.0.0.0", port=8000)
324
+ uvicorn.run("app:app", host="127.0.0.1", port=5050, reload=True)