abnerguzman's picture
Update app.py
6584d90 verified
raw
history blame contribute delete
No virus
6.77 kB
import gradio as gr
from datetime import datetime
import concurrent.futures
from io import StringIO
import requests
from urllib.parse import urlparse
import time
import demo as du
endpoint_url = "https://d34hcsxnegbpcslxzqsmesvr7m0ljtuz.lambda-url.us-west-2.on.aws/"
executor = concurrent.futures.ThreadPoolExecutor(max_workers=10)
def output_credit_dist_nollm(msg, cur_idx, _out_credit, _out_claims):
print(f'Start output_credit_dist at {datetime.now()}.')
start_time = time.perf_counter()
print(f'Target is ```{msg}```')
_out_credit.truncate(0)
_out_credit.seek(0)
_out_claims.truncate(0)
_out_claims.seek(0)
print(du.style2_str, file=_out_credit)
print(du.style2_str, file=_out_claims)
atoms_l, atom_topkmatches_l, credit_l = [], [], []
if len(msg) > 10:
params = { "msg": msg, "max_workers": 10 }
response = requests.get(endpoint_url, json=params).json()
atoms_l = response['atoms_l']
atom_topkmatches_l = response['atom_topkmatches_l']
# atomidx_w_single_url_aggmatch_l = response['atomidx_w_single_url_aggmatch_l']
atom_support_l = response['atom_support_l']
credit_dist = response['credit_dist']
if atoms_l:
(
url_to_cid_to_ctext_map,
url_to_cid_to_ctext_formatted_map,
url_to_cid_to_nquotes_map,
) = du.create_url_to_cid_to_ctext_formatted_map(atom_support_l)
url_to_title = {}
for atom_topkmatches in atom_topkmatches_l:
for match in atom_topkmatches:
url_to_title[match['metadata']['url']] = match['metadata']['title']
credit_l = [(url, w) for url, w in credit_dist.items()]
credit_l = sorted(credit_l, key=lambda x: x[1], reverse=True)
du.print_w_time_elapsed('Computed credit_l', start_time)
if not atom_topkmatches_l:
print(f"<div>", file=_out_credit)
print(f"<div class=\"doc-title\">No sources were found that are relevant this target.</div>", file=_out_credit)
print(f"</div>", file=_out_credit)
du.print_w_time_elapsed(f'End output_credit_dist (no matches) at {datetime.now()}', start_time)
return '', _out_credit.getvalue()
if not credit_l:
print(f"<div>", file=_out_credit)
print(f"<div class=\"doc-title\">No sources were found that strongly support this target.</div>", file=_out_credit)
print(f"</div>", file=_out_credit)
url_to_body_w_credit = {}
futures = []
for url, _ in credit_l:
futures.append(executor.submit(du.get_article_from_url, url))
for f in futures:
article = f.result()
url_to_body_w_credit[article['url']] = article['text']
du.print_w_time_elapsed(f'Got url bodies', start_time)
for url, w in credit_l:
match_text = du.format_chunk_texts_for_display3(
url,
url_to_cid_to_ctext_map[url],
url_to_cid_to_ctext_formatted_map[url],
url_to_cid_to_nquotes_map[url],
)
print(f"<div>", file=_out_credit)
favicon = f"<img src=\"https://www.google.com/s2/favicons?sz=128&amp;domain={urlparse(url).netloc}\"/>"
print(f"<div class=\"doc-title\">{favicon}&nbsp&nbsp;{url_to_title[url]}<score>{100*w:.0f}%</score></div>", file=_out_credit)
print(f"<div class=\"doc-url\"><a href=\"{url}\" target=\"_blank\">{url}</a></div>", file=_out_credit)
print(f"<div class=\"doc-text-wrapper\">", file=_out_credit)
print(f"<div class=\"doc-text-left\">{url_to_body_w_credit[url]}</div>", file=_out_credit)
print(f"<div class=\"doc-text-right\">{match_text}</div>", file=_out_credit)
print(f"</div>", file=_out_credit)
print(f"</div>", file=_out_credit)
print(f"<div>", file=_out_claims)
print(f"<div class=\"section-title\">Breakdown of article support for each extracted claim</div>", file=_out_claims)
for j, atom_support in enumerate(atom_support_l):
n_urls = len(atom_support.keys())
n_support = sum([1 if determination['true'] else 0 for determination in atom_support.values()])
print(f"<div class=\"claim-header\"><strong>Claim {j+1} ({n_support}/{n_urls}):</strong> \"{atoms_l[j]}\"</div>", file=_out_claims)
for url, aggmatch_determination in atom_support.items():
title = url_to_title[url]
print(f"<div class=\"claim-doc-title\">{title}</div>", file=_out_claims)
print(f"<div class=\"claim-doc-url\"><a href=\"{url}\" target=\"_blank\">{url}</a></div>", file=_out_claims)
print(f"<div class=\"claim-determination\"><strong>Determination:</strong> {'Supported' if aggmatch_determination['true'] else 'NOT supported'}.</div>", file=_out_claims)
print(f"<div class=\"claim-text\"><strong>Rationale:</strong> {aggmatch_determination['rationale']}</div>", file=_out_claims)
# if aggmatch_determination.get('quote_matches_l', None):
# for qid, qtext in enumerate(aggmatch_determination['quote_matches_l']):
# print(f"<div class=\"claim-text\"><strong>Quote {qid}:</strong> {qtext}</div>", file=_out_claims)
use_formatted = 'chunk_text_formatted_l' in aggmatch_determination and aggmatch_determination['chunk_text_formatted_l']
chunk_text_l_key = 'chunk_text_formatted_l' if use_formatted else 'chunk_text_l'
for cid, ctext in zip(aggmatch_determination['id_l'], aggmatch_determination[chunk_text_l_key]):
print(f"<div class=\"claim-text\"><strong>Chunk {cid}:</strong> {ctext}</div>", file=_out_claims)
print(f"</div>", file=_out_claims)
cur_idx[0] = 0
du.print_w_time_elapsed(f'End output_credit_dist at {datetime.now()}', start_time)
return 'Show claim breakdown', _out_credit.getvalue()
def toggle_output(cur_idx, _out_credit, _out_claims):
if cur_idx[0] < 0:
return ''
cur_idx[0] += 1
if cur_idx[0] % 2 == 0:
return 'Show claim breakdown', _out_credit.getvalue()
return 'Back to attribution', _out_claims.getvalue()
with gr.Blocks(theme=gr.themes.Default(text_size="lg")) as demo:
_out_credit_var = gr.State(StringIO)
_out_claims_var = gr.State(StringIO)
cur_idx_var = gr.State([0])
msg = gr.Textbox(label='Target')
results_box = gr.HTML(label='Matches')
toggle = gr.Button("")
msg.submit(output_credit_dist_nollm, [msg, cur_idx_var, _out_credit_var, _out_claims_var], [toggle, results_box], queue=False)
toggle.click(toggle_output, [cur_idx_var, _out_credit_var, _out_claims_var], [toggle, results_box], queue=False)
results_box.change(None, scroll_to_output=True)
if __name__ == "__main__":
demo.queue()
demo.launch()