Spaces:
Sleeping
Sleeping
import gradio as gr | |
from datetime import datetime | |
import concurrent.futures | |
from io import StringIO | |
import requests | |
from urllib.parse import urlparse | |
import time | |
import demo as du | |
endpoint_url = "https://d34hcsxnegbpcslxzqsmesvr7m0ljtuz.lambda-url.us-west-2.on.aws/" | |
executor = concurrent.futures.ThreadPoolExecutor(max_workers=10) | |
def output_credit_dist_nollm(msg, cur_idx, _out_credit, _out_claims): | |
print(f'Start output_credit_dist at {datetime.now()}.') | |
start_time = time.perf_counter() | |
print(f'Target is ```{msg}```') | |
_out_credit.truncate(0) | |
_out_credit.seek(0) | |
_out_claims.truncate(0) | |
_out_claims.seek(0) | |
print(du.style2_str, file=_out_credit) | |
print(du.style2_str, file=_out_claims) | |
atoms_l, atom_topkmatches_l, credit_l = [], [], [] | |
if len(msg) > 10: | |
params = { "msg": msg, "max_workers": 10 } | |
response = requests.get(endpoint_url, json=params).json() | |
atoms_l = response['atoms_l'] | |
atom_topkmatches_l = response['atom_topkmatches_l'] | |
# atomidx_w_single_url_aggmatch_l = response['atomidx_w_single_url_aggmatch_l'] | |
atom_support_l = response['atom_support_l'] | |
credit_dist = response['credit_dist'] | |
if atoms_l: | |
( | |
url_to_cid_to_ctext_map, | |
url_to_cid_to_ctext_formatted_map, | |
url_to_cid_to_nquotes_map, | |
) = du.create_url_to_cid_to_ctext_formatted_map(atom_support_l) | |
url_to_title = {} | |
for atom_topkmatches in atom_topkmatches_l: | |
for match in atom_topkmatches: | |
url_to_title[match['metadata']['url']] = match['metadata']['title'] | |
credit_l = [(url, w) for url, w in credit_dist.items()] | |
credit_l = sorted(credit_l, key=lambda x: x[1], reverse=True) | |
du.print_w_time_elapsed('Computed credit_l', start_time) | |
if not atom_topkmatches_l: | |
print(f"<div>", file=_out_credit) | |
print(f"<div class=\"doc-title\">No sources were found that are relevant this target.</div>", file=_out_credit) | |
print(f"</div>", file=_out_credit) | |
du.print_w_time_elapsed(f'End output_credit_dist (no matches) at {datetime.now()}', start_time) | |
return '', _out_credit.getvalue() | |
if not credit_l: | |
print(f"<div>", file=_out_credit) | |
print(f"<div class=\"doc-title\">No sources were found that strongly support this target.</div>", file=_out_credit) | |
print(f"</div>", file=_out_credit) | |
url_to_body_w_credit = {} | |
futures = [] | |
for url, _ in credit_l: | |
futures.append(executor.submit(du.get_article_from_url, url)) | |
for f in futures: | |
article = f.result() | |
url_to_body_w_credit[article['url']] = article['text'] | |
du.print_w_time_elapsed(f'Got url bodies', start_time) | |
for url, w in credit_l: | |
match_text = du.format_chunk_texts_for_display3( | |
url, | |
url_to_cid_to_ctext_map[url], | |
url_to_cid_to_ctext_formatted_map[url], | |
url_to_cid_to_nquotes_map[url], | |
) | |
print(f"<div>", file=_out_credit) | |
favicon = f"<img src=\"https://www.google.com/s2/favicons?sz=128&domain={urlparse(url).netloc}\"/>" | |
print(f"<div class=\"doc-title\">{favicon}  {url_to_title[url]}<score>{100*w:.0f}%</score></div>", file=_out_credit) | |
print(f"<div class=\"doc-url\"><a href=\"{url}\" target=\"_blank\">{url}</a></div>", file=_out_credit) | |
print(f"<div class=\"doc-text-wrapper\">", file=_out_credit) | |
print(f"<div class=\"doc-text-left\">{url_to_body_w_credit[url]}</div>", file=_out_credit) | |
print(f"<div class=\"doc-text-right\">{match_text}</div>", file=_out_credit) | |
print(f"</div>", file=_out_credit) | |
print(f"</div>", file=_out_credit) | |
print(f"<div>", file=_out_claims) | |
print(f"<div class=\"section-title\">Breakdown of article support for each extracted claim</div>", file=_out_claims) | |
for j, atom_support in enumerate(atom_support_l): | |
n_urls = len(atom_support.keys()) | |
n_support = sum([1 if determination['true'] else 0 for determination in atom_support.values()]) | |
print(f"<div class=\"claim-header\"><strong>Claim {j+1} ({n_support}/{n_urls}):</strong> \"{atoms_l[j]}\"</div>", file=_out_claims) | |
for url, aggmatch_determination in atom_support.items(): | |
title = url_to_title[url] | |
print(f"<div class=\"claim-doc-title\">{title}</div>", file=_out_claims) | |
print(f"<div class=\"claim-doc-url\"><a href=\"{url}\" target=\"_blank\">{url}</a></div>", file=_out_claims) | |
print(f"<div class=\"claim-determination\"><strong>Determination:</strong> {'Supported' if aggmatch_determination['true'] else 'NOT supported'}.</div>", file=_out_claims) | |
print(f"<div class=\"claim-text\"><strong>Rationale:</strong> {aggmatch_determination['rationale']}</div>", file=_out_claims) | |
# if aggmatch_determination.get('quote_matches_l', None): | |
# for qid, qtext in enumerate(aggmatch_determination['quote_matches_l']): | |
# print(f"<div class=\"claim-text\"><strong>Quote {qid}:</strong> {qtext}</div>", file=_out_claims) | |
use_formatted = 'chunk_text_formatted_l' in aggmatch_determination and aggmatch_determination['chunk_text_formatted_l'] | |
chunk_text_l_key = 'chunk_text_formatted_l' if use_formatted else 'chunk_text_l' | |
for cid, ctext in zip(aggmatch_determination['id_l'], aggmatch_determination[chunk_text_l_key]): | |
print(f"<div class=\"claim-text\"><strong>Chunk {cid}:</strong> {ctext}</div>", file=_out_claims) | |
print(f"</div>", file=_out_claims) | |
cur_idx[0] = 0 | |
du.print_w_time_elapsed(f'End output_credit_dist at {datetime.now()}', start_time) | |
return 'Show claim breakdown', _out_credit.getvalue() | |
def toggle_output(cur_idx, _out_credit, _out_claims): | |
if cur_idx[0] < 0: | |
return '' | |
cur_idx[0] += 1 | |
if cur_idx[0] % 2 == 0: | |
return 'Show claim breakdown', _out_credit.getvalue() | |
return 'Back to attribution', _out_claims.getvalue() | |
with gr.Blocks(theme=gr.themes.Default(text_size="lg")) as demo: | |
_out_credit_var = gr.State(StringIO) | |
_out_claims_var = gr.State(StringIO) | |
cur_idx_var = gr.State([0]) | |
msg = gr.Textbox(label='Target') | |
results_box = gr.HTML(label='Matches') | |
toggle = gr.Button("") | |
msg.submit(output_credit_dist_nollm, [msg, cur_idx_var, _out_credit_var, _out_claims_var], [toggle, results_box], queue=False) | |
toggle.click(toggle_output, [cur_idx_var, _out_credit_var, _out_claims_var], [toggle, results_box], queue=False) | |
results_box.change(None, scroll_to_output=True) | |
if __name__ == "__main__": | |
demo.queue() | |
demo.launch() |