abnerguzman commited on
Commit
6584d90
1 Parent(s): d5bada5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -13
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import gradio as gr
2
 
3
  from datetime import datetime
 
4
  from io import StringIO
5
  import requests
6
 
@@ -9,10 +10,10 @@ import time
9
 
10
  import demo as du
11
 
12
- endpoint_url = "https://mldevattrib.poc.prorata.ai/"
 
13
 
14
-
15
- def output_credit_dist(msg, cur_idx, _out_credit, _out_claims):
16
  print(f'Start output_credit_dist at {datetime.now()}.')
17
  start_time = time.perf_counter()
18
  print(f'Target is ```{msg}```')
@@ -21,8 +22,8 @@ def output_credit_dist(msg, cur_idx, _out_credit, _out_claims):
21
  _out_claims.truncate(0)
22
  _out_claims.seek(0)
23
 
24
- print(du.style_str, file=_out_credit)
25
- print(du.style_str, file=_out_claims)
26
 
27
  atoms_l, atom_topkmatches_l, credit_l = [], [], []
28
 
@@ -37,7 +38,12 @@ def output_credit_dist(msg, cur_idx, _out_credit, _out_claims):
37
  credit_dist = response['credit_dist']
38
 
39
  if atoms_l:
40
- url_to_supporting_cid_ctext_tuples = du.get_url_to_supporting_cid_ctext_tuples(atom_support_l)
 
 
 
 
 
41
  url_to_title = {}
42
  for atom_topkmatches in atom_topkmatches_l:
43
  for match in atom_topkmatches:
@@ -59,16 +65,30 @@ def output_credit_dist(msg, cur_idx, _out_credit, _out_claims):
59
  print(f"<div class=\"doc-title\">No sources were found that strongly support this target.</div>", file=_out_credit)
60
  print(f"</div>", file=_out_credit)
61
 
62
- for url, w in credit_l:
63
- # match_text = chunk_separator.join([x[1] for x in url_to_supporting_cid_ctext_tuples[url]])
64
- match_text = du.format_chunk_texts_for_display(url_to_supporting_cid_ctext_tuples[url])
65
- print(f"{url} cids: {[x[0] for x in url_to_supporting_cid_ctext_tuples[url]]}")
 
 
 
 
66
 
 
 
 
 
 
 
 
67
  print(f"<div>", file=_out_credit)
68
  favicon = f"<img src=\"https://www.google.com/s2/favicons?sz=128&amp;domain={urlparse(url).netloc}\"/>"
69
  print(f"<div class=\"doc-title\">{favicon}&nbsp&nbsp;{url_to_title[url]}<score>{100*w:.0f}%</score></div>", file=_out_credit)
70
  print(f"<div class=\"doc-url\"><a href=\"{url}\" target=\"_blank\">{url}</a></div>", file=_out_credit)
71
- print(f"<div class=\"doc-text\">{match_text}</div>", file=_out_credit)
 
 
 
72
  print(f"</div>", file=_out_credit)
73
 
74
  print(f"<div>", file=_out_claims)
@@ -88,7 +108,14 @@ def output_credit_dist(msg, cur_idx, _out_credit, _out_claims):
88
  print(f"<div class=\"claim-determination\"><strong>Determination:</strong> {'Supported' if aggmatch_determination['true'] else 'NOT supported'}.</div>", file=_out_claims)
89
  print(f"<div class=\"claim-text\"><strong>Rationale:</strong> {aggmatch_determination['rationale']}</div>", file=_out_claims)
90
 
91
- for cid, ctext in zip(aggmatch_determination['id_l'], aggmatch_determination['chunk_text_l']):
 
 
 
 
 
 
 
92
  print(f"<div class=\"claim-text\"><strong>Chunk {cid}:</strong> {ctext}</div>", file=_out_claims)
93
 
94
  print(f"</div>", file=_out_claims)
@@ -114,7 +141,7 @@ with gr.Blocks(theme=gr.themes.Default(text_size="lg")) as demo:
114
  results_box = gr.HTML(label='Matches')
115
  toggle = gr.Button("")
116
 
117
- msg.submit(output_credit_dist, [msg, cur_idx_var, _out_credit_var, _out_claims_var], [toggle, results_box], queue=False)
118
  toggle.click(toggle_output, [cur_idx_var, _out_credit_var, _out_claims_var], [toggle, results_box], queue=False)
119
 
120
  results_box.change(None, scroll_to_output=True)
 
1
  import gradio as gr
2
 
3
  from datetime import datetime
4
+ import concurrent.futures
5
  from io import StringIO
6
  import requests
7
 
 
10
 
11
  import demo as du
12
 
13
+ endpoint_url = "https://d34hcsxnegbpcslxzqsmesvr7m0ljtuz.lambda-url.us-west-2.on.aws/"
14
+ executor = concurrent.futures.ThreadPoolExecutor(max_workers=10)
15
 
16
+ def output_credit_dist_nollm(msg, cur_idx, _out_credit, _out_claims):
 
17
  print(f'Start output_credit_dist at {datetime.now()}.')
18
  start_time = time.perf_counter()
19
  print(f'Target is ```{msg}```')
 
22
  _out_claims.truncate(0)
23
  _out_claims.seek(0)
24
 
25
+ print(du.style2_str, file=_out_credit)
26
+ print(du.style2_str, file=_out_claims)
27
 
28
  atoms_l, atom_topkmatches_l, credit_l = [], [], []
29
 
 
38
  credit_dist = response['credit_dist']
39
 
40
  if atoms_l:
41
+ (
42
+ url_to_cid_to_ctext_map,
43
+ url_to_cid_to_ctext_formatted_map,
44
+ url_to_cid_to_nquotes_map,
45
+ ) = du.create_url_to_cid_to_ctext_formatted_map(atom_support_l)
46
+
47
  url_to_title = {}
48
  for atom_topkmatches in atom_topkmatches_l:
49
  for match in atom_topkmatches:
 
65
  print(f"<div class=\"doc-title\">No sources were found that strongly support this target.</div>", file=_out_credit)
66
  print(f"</div>", file=_out_credit)
67
 
68
+ url_to_body_w_credit = {}
69
+ futures = []
70
+ for url, _ in credit_l:
71
+ futures.append(executor.submit(du.get_article_from_url, url))
72
+ for f in futures:
73
+ article = f.result()
74
+ url_to_body_w_credit[article['url']] = article['text']
75
+ du.print_w_time_elapsed(f'Got url bodies', start_time)
76
 
77
+ for url, w in credit_l:
78
+ match_text = du.format_chunk_texts_for_display3(
79
+ url,
80
+ url_to_cid_to_ctext_map[url],
81
+ url_to_cid_to_ctext_formatted_map[url],
82
+ url_to_cid_to_nquotes_map[url],
83
+ )
84
  print(f"<div>", file=_out_credit)
85
  favicon = f"<img src=\"https://www.google.com/s2/favicons?sz=128&amp;domain={urlparse(url).netloc}\"/>"
86
  print(f"<div class=\"doc-title\">{favicon}&nbsp&nbsp;{url_to_title[url]}<score>{100*w:.0f}%</score></div>", file=_out_credit)
87
  print(f"<div class=\"doc-url\"><a href=\"{url}\" target=\"_blank\">{url}</a></div>", file=_out_credit)
88
+ print(f"<div class=\"doc-text-wrapper\">", file=_out_credit)
89
+ print(f"<div class=\"doc-text-left\">{url_to_body_w_credit[url]}</div>", file=_out_credit)
90
+ print(f"<div class=\"doc-text-right\">{match_text}</div>", file=_out_credit)
91
+ print(f"</div>", file=_out_credit)
92
  print(f"</div>", file=_out_credit)
93
 
94
  print(f"<div>", file=_out_claims)
 
108
  print(f"<div class=\"claim-determination\"><strong>Determination:</strong> {'Supported' if aggmatch_determination['true'] else 'NOT supported'}.</div>", file=_out_claims)
109
  print(f"<div class=\"claim-text\"><strong>Rationale:</strong> {aggmatch_determination['rationale']}</div>", file=_out_claims)
110
 
111
+ # if aggmatch_determination.get('quote_matches_l', None):
112
+ # for qid, qtext in enumerate(aggmatch_determination['quote_matches_l']):
113
+ # print(f"<div class=\"claim-text\"><strong>Quote {qid}:</strong> {qtext}</div>", file=_out_claims)
114
+
115
+ use_formatted = 'chunk_text_formatted_l' in aggmatch_determination and aggmatch_determination['chunk_text_formatted_l']
116
+ chunk_text_l_key = 'chunk_text_formatted_l' if use_formatted else 'chunk_text_l'
117
+
118
+ for cid, ctext in zip(aggmatch_determination['id_l'], aggmatch_determination[chunk_text_l_key]):
119
  print(f"<div class=\"claim-text\"><strong>Chunk {cid}:</strong> {ctext}</div>", file=_out_claims)
120
 
121
  print(f"</div>", file=_out_claims)
 
141
  results_box = gr.HTML(label='Matches')
142
  toggle = gr.Button("")
143
 
144
+ msg.submit(output_credit_dist_nollm, [msg, cur_idx_var, _out_credit_var, _out_claims_var], [toggle, results_box], queue=False)
145
  toggle.click(toggle_output, [cur_idx_var, _out_credit_var, _out_claims_var], [toggle, results_box], queue=False)
146
 
147
  results_box.change(None, scroll_to_output=True)