aus10powell commited on
Commit
62a5163
1 Parent(s): 70fef7d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -26
app.py CHANGED
@@ -7,6 +7,7 @@ import datetime as dt
7
  import json
8
  import logging
9
  import sys
 
10
  #sys.setrecursionlimit(20000)
11
  import pandas as pd
12
  import numpy as np
@@ -16,18 +17,21 @@ from typing import Dict, List
16
 
17
  import uvicorn
18
  from fastapi import FastAPI, HTTPException, Request, Response
19
- from fastapi.responses import HTMLResponse
20
  from fastapi.staticfiles import StaticFiles
21
  from fastapi.templating import Jinja2Templates
22
 
 
23
  import scripts.sentiment as sentiment
24
  import scripts.twitter_scraper as ts
 
25
  from scripts.summarization import bert_summarization
26
  from scripts.twitter_scraper import get_latest_account_tweets
27
  from scripts import twitter_scraper as ts
28
  import scripts.utils as utils
29
  from scripts import generative
30
  import nltk
 
31
  logging.basicConfig(level=logging.INFO)
32
 
33
  app = FastAPI()
@@ -57,7 +61,7 @@ async def webpage(request: Request):
57
 
58
 
59
  @app.get("/accounts")
60
- def get_accounts() -> List[dict]:
61
  import pandas as pd
62
 
63
  logging.info(f"Pulling account information on {username_list}")
@@ -94,17 +98,26 @@ def get_tweets_username(username: str) -> dict:
94
  print(df_tweets.head(2))
95
  print(df_tweets.shape)
96
  df_tweets = df_tweets[["handle", "created_at", "full_text"]]
97
- df_tweets = df_tweets.sort_values("created_at", ascending=True).tail(10)
98
- df_tweets_html = df_tweets.to_html(classes="center", index=False)
 
 
 
 
 
 
 
 
99
 
100
- return HTMLResponse(content=df_tweets_html, status_code=200)
 
101
  else:
102
  print("Error: Failed to retrieve tweets.")
103
  return df_tweets
104
 
105
 
106
  @app.get("/audience/{username}", response_model=dict)
107
- def get_audience(username: str) -> dict:
108
  if username in username_list:
109
  query = f"from:{username} since:{start_date} until:{end_date}"
110
  tweets = ts.get_tweets(query=query)
@@ -203,6 +216,14 @@ async def get_sentiment(username: str) -> Dict[str, Dict[str, float]]:
203
 
204
  @app.post("/api/generate")
205
  async def generate_text(request: Request):
 
 
 
 
 
 
 
 
206
  print("*" * 50)
207
  data = await request.json()
208
  print("*" * 50)
@@ -223,18 +244,18 @@ async def generate_text(request: Request):
223
  ###################################################
224
  ## Clean up generate text
225
  # Get rid of final sentence
226
- # sentences = nltk.sent_tokenize(generated_text)
227
- # unique_sentences = set()
228
- # non_duplicate_sentences = []
229
- # for sentence in sentences:
230
- # if sentence not in unique_sentences:
231
- # non_duplicate_sentences.append(sentence)
232
- # unique_sentences.add(sentence)
233
- # final_text = " ".join(non_duplicate_sentences[:-1])
234
-
235
- final_text= generated_text
236
  return {"generated_text": final_text}
237
 
 
238
  @app.post("/api/generate_summary")
239
  async def generate_summary(request: Request):
240
  """Generate summary from tweets
@@ -248,16 +269,45 @@ async def generate_summary(request: Request):
248
 
249
  print("*" * 50)
250
  data = await request.json()
251
-
252
  # Get the list of text
253
- texts = data["text"]
254
 
255
 
256
- # Generate the summary
257
- summary = "This is a placeholder for summary model being returned"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
  # Return the summary
260
- return {"summary": summary}
261
 
262
 
263
  @app.get("/examples1")
@@ -272,8 +322,3 @@ async def read_examples():
272
  with open("templates/charts/handle_sentiment_timesteps.html") as f:
273
  html = f.read()
274
  return HTMLResponse(content=html)
275
-
276
- # uvicorn --workers=2 app:app
277
- # if __name__ == "__main__":
278
- # # uvicorn.run(app, host="0.0.0.0", port=8000)
279
- # uvicorn.run("app:app", host="127.0.0.1", port=5049, reload=True)
 
7
  import json
8
  import logging
9
  import sys
10
+ import spacy
11
  #sys.setrecursionlimit(20000)
12
  import pandas as pd
13
  import numpy as np
 
17
 
18
  import uvicorn
19
  from fastapi import FastAPI, HTTPException, Request, Response
20
+ from fastapi.responses import HTMLResponse, JSONResponse
21
  from fastapi.staticfiles import StaticFiles
22
  from fastapi.templating import Jinja2Templates
23
 
24
+ from rouge_score import rouge_scorer
25
  import scripts.sentiment as sentiment
26
  import scripts.twitter_scraper as ts
27
+ from scripts import sentiment
28
  from scripts.summarization import bert_summarization
29
  from scripts.twitter_scraper import get_latest_account_tweets
30
  from scripts import twitter_scraper as ts
31
  import scripts.utils as utils
32
  from scripts import generative
33
  import nltk
34
+
35
  logging.basicConfig(level=logging.INFO)
36
 
37
  app = FastAPI()
 
61
 
62
 
63
  @app.get("/accounts")
64
+ async def get_accounts() -> List[dict]:
65
  import pandas as pd
66
 
67
  logging.info(f"Pulling account information on {username_list}")
 
98
  print(df_tweets.head(2))
99
  print(df_tweets.shape)
100
  df_tweets = df_tweets[["handle", "created_at", "full_text"]]
101
+ df_tweets["created_at"] = df_tweets["created_at"].dt.strftime("%Y-%m-%d %H:%M:%S")
102
+ df_tweets = df_tweets.sort_values("created_at", ascending=False)#.tail(10)
103
+ df_tweets_html = df_tweets.to_html(classes="center", index=False, escape=False)
104
+ df_tweets.to_html(open('df_tweets_html.html', 'w'))
105
+ df_tweets_data = df_tweets.to_dict(orient="records")
106
+
107
+ response_data = {
108
+ "html": df_tweets_html,
109
+ "data": df_tweets_data
110
+ }
111
 
112
+ return JSONResponse(content=response_data, status_code=200)
113
+ # return HTMLResponse(content=df_tweets_html, status_code=200)
114
  else:
115
  print("Error: Failed to retrieve tweets.")
116
  return df_tweets
117
 
118
 
119
  @app.get("/audience/{username}", response_model=dict)
120
+ async def get_audience(username: str) -> dict:
121
  if username in username_list:
122
  query = f"from:{username} since:{start_date} until:{end_date}"
123
  tweets = ts.get_tweets(query=query)
 
216
 
217
  @app.post("/api/generate")
218
  async def generate_text(request: Request):
219
+ """Generate text from a prompt.
220
+
221
+ Args:
222
+ request: The HTTP request.
223
+
224
+ Returns:
225
+ The generated text.
226
+ """
227
  print("*" * 50)
228
  data = await request.json()
229
  print("*" * 50)
 
244
  ###################################################
245
  ## Clean up generate text
246
  # Get rid of final sentence
247
+ sentences = nltk.sent_tokenize(generated_text)
248
+ unique_sentences = set()
249
+ non_duplicate_sentences = []
250
+ for sentence in sentences:
251
+ if sentence not in unique_sentences:
252
+ non_duplicate_sentences.append(sentence)
253
+ unique_sentences.add(sentence)
254
+ final_text = " ".join(non_duplicate_sentences[:-1])
255
+
 
256
  return {"generated_text": final_text}
257
 
258
+
259
  @app.post("/api/generate_summary")
260
  async def generate_summary(request: Request):
261
  """Generate summary from tweets
 
269
 
270
  print("*" * 50)
271
  data = await request.json()
272
+ print('data',data['tweetsData'])
273
  # Get the list of text
274
+ tweets = [t['full_text'] for t in data["tweetsData"]]
275
 
276
 
277
+ # Concatenate tweets into a single string
278
+ text = " .".join(tweets)
279
+
280
+ nlp = spacy.load("en_core_web_sm")
281
+ nlp.add_pipe("sentencizer")
282
+
283
+ sentences = nlp(text).sents
284
+ # sentences = Text8Corpus(text)
285
+ # phrases = Phrases(
286
+ # sentences, min_count=1, threshold=1, connector_words=ENGLISH_CONNECTOR_WORDS
287
+ # )
288
+ # first_sentence = next(iter(sentences))
289
+ # first_sentence
290
+ sentences = list(sentences)
291
+ # # Shuffle the list
292
+ # random.shuffle(sentences)
293
+ # Option 1
294
+ # sampled_tweets = random.sample(tweets, int(0.1 * len(tweets)))
295
 
296
+ # Option 2
297
+ sampled_sentences = random.sample(sentences, int(0.1 * len(sentences)))
298
+
299
+ sampled_sentences = [sentiment.tweet_cleaner(s.text) for s in sampled_sentences]
300
+
301
+ # Join the strings into one text blob
302
+ tweet_blob = " ".join(sampled_sentences)
303
+
304
+ # Generate the summary
305
+ summary = bert_summarization(
306
+ tweet_blob
307
+ )
308
+ print("Summary:",summary)
309
  # Return the summary
310
+ return {"tweets_summary": summary}
311
 
312
 
313
  @app.get("/examples1")
 
322
  with open("templates/charts/handle_sentiment_timesteps.html") as f:
323
  html = f.read()
324
  return HTMLResponse(content=html)