File size: 3,849 Bytes
ef95c3f
 
 
239e7f0
5a59d36
b624fdb
bb15fb1
 
ef95c3f
 
 
 
d830e0f
ef95c3f
 
17675d2
ef95c3f
 
 
 
 
 
 
 
 
 
 
 
 
 
e3b28ca
ef95c3f
e3b28ca
ef95c3f
 
 
8cfe76c
ef95c3f
8cfe76c
ef95c3f
 
 
 
45040a9
8d03248
26f12e6
 
 
2c76f79
f1e0495
ef95c3f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import vllm
import torch
import gradio
import huggingface_hub
import os

huggingface_hub.login(token=os.environ["HF_TOKEN"], , add_to_git_credential=True)
hf_writer = gradio.HuggingFaceDatasetSaver(os.environ["HF_WRITE_TOKEN"], "fava-flagged-demo")

# Fava prompt
INPUT = "Read the following references:\n{evidence}\nPlease identify all the errors in the following text using the information in the references provided and suggest edits if necessary:\n[Text] {output}\n[Edited] "

model = vllm.LLM(model="fava-uw/fava-model") 
def result(passage, reference):
    prompt = [INPUT.format_map({"evidence":reference, "output":passage})]
    print(prompt)
    sampling_params = vllm.SamplingParams(
        temperature=0,
        top_p=1.0,
        max_tokens=500,
    )
    outputs = model.generate(prompt, sampling_params)
    outputs = [it.outputs[0].text for it in outputs]
    output = outputs[0].replace("<mark>", "<span style='color: green; font-weight: bold;'> ")
    output = output.replace("</mark>", " </span>")
    output = output.replace("<delete>", "<span style='color: red; text-decoration: line-through;'>")
    output = output.replace("</delete>", "</span>")
    output = output.replace("<entity>", "<span style='background-color: #E9A2D9; border-bottom: 1px dotted;'>entity</span>")
    output = output.replace("<relation>", "<span style='background-color: #F3B78B; border-bottom: 1px dotted;'>relation</span>")
    output = output.replace("<contradictory>", "<span style='background-color: #FFFF9B; border-bottom: 1px dotted;'>contradictory</span>")
    output = output.replace("<unverifiable>", "<span style='background-color: #D3D3D3; border-bottom: 1px dotted;'>unverifiable</span><u>")
    output = output.replace("<invented>", "<span style='background-color: #BFE9B9; border-bottom: 1px dotted;'>invented</span>")
    output = output.replace("<subjective>", "<span style='background-color: #D3D3D3; border-bottom: 1px dotted;'>subjective</span><u>")
    output = output.replace("</entity>", "")
    output = output.replace("</relation>", "")
    output = output.replace("</contradictory>", "")
    output = output.replace("</unverifiable>", "</u>")
    output = output.replace("</invented>", "")
    output = output.replace("</subjective>", "</u>")
    output = output.replace("Edited:", "")
    return f'<div style="font-weight: normal;">{output}</div>'; #output;

if __name__ == "__main__":
    article = """<center><img src='https://github.com/abhika-m/researchpapers/blob/main/fava.png?raw=true' width="650px"'><img src='https://github.com/abhika-m/researchpapers/blob/main/taxonomy.png?raw=true' width="850px"></center>"""
    description = """Given a passage and a reference, FAVA will detect and edit any hallucinations present in the passage. If you find any errors with FAVA's output, please flag it. For more information, check out our <a href="https://arxiv.org/abs/2401.06855" target='_blank'>paper</a>."""
    examples = [["Canada's oldest national park, Banff, was established in 1886. It recently won a Nature's Choice 2023 award for its beautiful mountainous terrain. It's the best national park ever.", 
                 "Banff National Park is Canada's oldest national park, established in 1885 as Rocky Mountains Park. Located in Alberta's Rocky Mountains, 110–180 kilometres (68–112 mi) west of Calgary, Banff encompasses 6,641 square kilometres (2,564 sq mi) of mountainous terrain."]]
    demo = gradio.Interface(fn=result, inputs=["text", "text"], outputs="html", title="Fine-grained Hallucination Detection and Editing (FAVA)", 
                            description=description, article=article,
                           examples=examples, allow_flagging="manual", flagging_options=["wrong detection", "wrong edit", "both wrong", "other"], flagging_callback=hf_writer)
    demo.launch(share=True)