Roozeec commited on
Commit
60f5e0a
1 Parent(s): 4081ce7

updated news parsing

Browse files
Files changed (2) hide show
  1. app.py +12 -16
  2. wna_googlenews.py +1 -2
app.py CHANGED
@@ -46,37 +46,33 @@ with st.sidebar:
46
 
47
 
48
  if st.button("Search"):
 
49
  # display a loading progress
50
  with st.spinner("Loading last news ..."):
51
- classifier = pipeline(task="text-classification", model=settings["model"], top_k=None)
52
- df = wna.get_news(settings, query)
53
  with st.spinner("Processing received news ..."):
54
- # st.dataframe(df)
55
- # get each title colums
56
- sentences = df["title"]
57
- # convert into array
58
- sentences = sentences.tolist()
59
- # st.write(sentences)
60
- # create new dataframe
61
- df = pd.DataFrame(columns=["sentence", "best","second"])
62
  # loop on each sentence and call classifier
63
- for sentence in sentences:
64
- cur_sentence = sentence
65
- model_outputs = classifier(sentence)
 
 
66
  cur_result = model_outputs[0]
67
  #st.write(cur_result)
68
  # get label 1
69
  label = cur_result[0]['label']
70
  score = cur_result[0]['score']
71
  percentage = round(score * 100, 2)
72
- str1 = label + " " + str(percentage)
73
  # get label 2
74
  label = cur_result[1]['label']
75
  score = cur_result[1]['score']
76
  percentage = round(score * 100, 2)
77
- str2 = label + " " + str(percentage)
78
  # insert cur_sentence and cur_result into dataframe
79
- df.loc[len(df.index)] = [cur_sentence, str1, str2]
80
 
81
  # write info on the output
82
  st.write("Number of sentences:", len(df))
 
46
 
47
 
48
  if st.button("Search"):
49
+ classifier = pipeline(task="text-classification", model=settings["model"], top_k=None)
50
  # display a loading progress
51
  with st.spinner("Loading last news ..."):
52
+ allnews = wna.get_news(settings, query)
53
+ st.dataframe(allnews)
54
  with st.spinner("Processing received news ..."):
55
+ df = pd.DataFrame(columns=["sentence", "date","best","second"])
 
 
 
 
 
 
 
56
  # loop on each sentence and call classifier
57
+ for curnews in allnews:
58
+ #st.write(curnews)
59
+ cur_sentence = curnews["title"]
60
+ cur_date = curnews["date"]
61
+ model_outputs = classifier(cur_sentence)
62
  cur_result = model_outputs[0]
63
  #st.write(cur_result)
64
  # get label 1
65
  label = cur_result[0]['label']
66
  score = cur_result[0]['score']
67
  percentage = round(score * 100, 2)
68
+ str1 = label + " (" + str(percentage) + ")%"
69
  # get label 2
70
  label = cur_result[1]['label']
71
  score = cur_result[1]['score']
72
  percentage = round(score * 100, 2)
73
+ str2 = label + " (" + str(percentage) + ")%"
74
  # insert cur_sentence and cur_result into dataframe
75
+ df.loc[len(df.index)] = [cur_sentence, cur_date, str1, str2]
76
 
77
  # write info on the output
78
  st.write("Number of sentences:", len(df))
wna_googlenews.py CHANGED
@@ -19,5 +19,4 @@ def get_news(settings, query):
19
  page_result = googlenews.page_at(page)
20
  # merge dat
21
  final_list = final_list + page_result
22
- df = pd.DataFrame(final_list)
23
- return df
 
19
  page_result = googlenews.page_at(page)
20
  # merge dat
21
  final_list = final_list + page_result
22
+ return final_list