Spaces:
Runtime error
Runtime error
File size: 11,292 Bytes
109a4ff d6a6654 7f7868d 109a4ff 69425c0 109a4ff b4a5ab4 c01709b 80a7945 39f1e00 59cbc76 0a0f3eb 109a4ff d5c2739 4f0bd75 d5c2739 ae3ef14 4f0bd75 d5c2739 6c6e336 d5c2739 109a4ff 207f13b 109a4ff 207f13b 109a4ff 207f13b 109a4ff 207f13b 109a4ff 6e5e0b3 adbd551 59cbc76 bb49f2c adbd551 e960cef adbd551 e960cef adbd551 59cbc76 0a0f3eb 364b156 2d870ff adbd551 59cbc76 80a7945 8308898 364b156 109a4ff 364b156 59cbc76 80a7945 adbd551 39f1e00 adbd551 39f1e00 adbd551 0a0f3eb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 |
import gradio as gr
import pandas as pd
import evaluate
import theme
import re
df = pd.read_csv("./wiki_bio_gpt3_hallucination.csv")
title = "<h1 style='text-align: center; color: #333333; font-size: 40px;'> Maya, Mithya and AI Hallucinations </h1>"
description = "1)Put your context (could be any topic)</br> 2) Create Questions, </br> 3) Summarize </br>4) Detect Hallucination ( differece between Summary and Context) ."
description2 = "Detect Hallucination using NLI ."
titleRAG = "<h1 style='text-align: center; color: #333333; font-size: 40px;'> Routing , Chaining and Branching of RAG documents </h1>"
description3 = " This (experimental) novel approach involves creating a router across multiple Rag documents, chaining them together, and incorporating branching for greater flexibility and adaptability."
import numpy as np
import pandas as pd
import ast
from openai import OpenAI
import os
import torch
import spacy
from selfcheckgpt.modeling_selfcheck import SelfCheckMQAG, SelfCheckBERTScore
import en_core_web_sm
nlp = en_core_web_sm.load()
os.environ["OPENAI_API_KEY"]=os.environ['API_TOKEN']
client = OpenAI()
torch.manual_seed(28)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
selfcheck_mqag = SelfCheckMQAG(device=device)
selfcheck_bertscore = SelfCheckBERTScore()
#nlp = spacy.load("en_core_web_sm")
from selfcheckgpt.modeling_mqag import MQAG
mqag_model = MQAG(
g1_model_type='race', # race (more abstractive), squad (more extractive)
device=device
)
document =r"""This is the complete recipe cooking paneer butter masala.Heat 1 teaspoon of oil in a pan on medium heat. Once the oil is hot, add the bay leaf, cinnamon stick, cloves and saute for few seconds.Then add the the onion, garlic, ginger and saute for 2 to 3 minutes until the onion is translucent. Add the tomatoes and cashews and mix. Then add 1 cup of water.Cover the pan and cook on medium heat for 15 minutes.After 15 minutes, remove the pan from heat. Remove the bay leaf, cinnamon stick and cloves.Let the mixture cool down a bit and then transfer to a blender. Itβs important to let it cool down a bit else it will all blow up from the mixer. Grind the masala to a smooth paste and set aside. To the same pan now add 2 tablespoons butter along with remaining 1 teaspoon oil on medium heat.Once the butter melts, add the red chili powder and the Kashmiri red chili powder and fry for few seconds. This will give the curry a nice red color. Then add the ground paste back into the pan along with the garam masala (start with 1/2 teaspoon and add the remaining 1/4 teaspoon at the end only if you feel like the curry needs that extra bit of garam masala), cardamom powder, sugar, salt and tomato paste (if using).Mix well and cook for 1-2 minutes.Then add the cream and mix. Add in the paneer and cook for 2 to 3 minutes on medium heat. Finally add crushed kasuri methi.Garnish paneer butter masala with cilantro and serve hot with naan or rice!""".replace("\n", "")
summary = "Heat oil in a pan, sautΓ© spices, onions, garlic, ginger, then tomatoes and cashews, cook with water, blend into a paste, melt butter, add chili powders, return paste to pan with spices, sugar, salt, tomato paste, cook, add cream, paneer, kasuri methi, garnish with cilantro, serve with naan or rice."
df = pd.read_csv("./wiki_bio_gpt3_hallucination.csv")
def compute_score_per_document(scores):
scores = ast.literal_eval(scores)
scores = np.array(scores)
return scores.mean()
df["average_score"] = df["sent_scores_nli"].apply(compute_score_per_document)
sorted_df = df.sort_values(by=['average_score'], ascending=False)
THRESHOLD = 0.5
examples = {}
for i in range(3):
sample = sorted_df.iloc[[i]]
examples[f"High hallucination sample {i+1}"] = (sample.index[0] , sample["gpt3_text"].values[0])
sample = sorted_df.iloc[[-(i+1)]]
examples[f"Low hallucination sample {i+1}"] = (sample.index[0] , sample["gpt3_text"].values[0])
def mirror(example):
return examples[example][1]
def evaluate(example, treshold):
index = examples[example][0]
row = sorted_df.loc[index]
scores = ast.literal_eval(row["sent_scores_nli"])
sentences = ast.literal_eval(row["gpt3_sentences"])
annotations = ast.literal_eval(row["annotation"])
predictions = []
labels = []
n = len(sentences)
average_score_predicted = 0.0
average_score_truth = 0.0
for score, sentence, annotation in zip(scores, sentences, annotations):
if score > treshold:
prediction = "hallucination"
average_score_predicted += 1.0
else:
prediction = "factual"
if annotation == "accurate":
annotation = "factual"
else:
annotation = "hallucination"
average_score_truth += 1.0
predictions.append((sentence, prediction))
labels.append((sentence, annotation))
average_score_predicted /= n
average_score_predicted = "{:.0%}".format(average_score_predicted)
average_score_truth /= n
average_score_truth = "{:.0%}".format(average_score_truth)
return average_score_predicted, predictions, labels, average_score_truth
def read_html_file(file_path):
try:
with open(file_path, 'r', encoding='utf-8') as file:
html_content = file.read()
html_content = html_content.encode('ascii', 'ignore').decode('ascii')
html_content= html_content.replace("\n","")
html_content=re.sub( ">\s+<", "><" , html_content)
return html_content
except FileNotFoundError:
print(f"File not found: {file_path}")
return None
except Exception as e:
print(f"An error occurred: {str(e)}")
return None
html_content = read_html_file("cookgpt.html")
print(html_content)
def createQuestions(documents):
questions = mqag_model.generate(context=document, do_sample=True, num_questions=3)
return questions
def detect(context, summary):
print(summary)
score = mqag_model.score(candidate=summary, reference=context, num_questions=3, verbose=True)
return score
def summarize(document):
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{
"role": "system",
"content": "Summarize content you are provided with for a second-grade student."
},
{
"role": "user",
"content":document
}
],
temperature=0.7,
max_tokens=64,
top_p=1
)
return response.choices[0].message.content
def detectRag(document,rags):
print(rags)
options = sorted([word.capitalize() for word in rags.split(",")])
print(options)
questions = [{'question': "what is the main topic of this?", 'options': options}]
probs = mqag_model.answer(questions=questions, context=document)
print(probs[0])
return probs[0]
with gr.Blocks() as demo:
with gr.Tab("Maya"):
gr.Markdown(title)
gr.Markdown(description)
with gr.Row():
with gr.Column():
context = gr.TextArea(label="Context" , value=document)
questions = gr.TextArea(label="Questions")
createQuestiobBTN = gr.Button("Create Questions")
createQuestiobBTN.click(createQuestions, inputs=context, outputs=questions)
with gr.Row():
with gr.Column():
summaryTx = gr.TextArea(label="Summary" , value=summary)
createSummaryBTN = gr.Button("Create Summary")
createSummaryBTN.click(summarize, inputs=context, outputs=summaryTx)
score = gr.TextArea(label="Score")
detectHallucinate = gr.Button("Detect Hallucination")
detectHallucinate.click(detect, inputs=[context,summaryTx], outputs=score)
gr.HTML(html_content)
with gr.Tab("Mithya"):
gr.Markdown(title)
gr.Markdown(description2)
with gr.Row():
with gr.Column():
examples_dropdown = gr.Dropdown(choices=list(examples.keys()), value=list(examples.keys())[0],
interactive=True,
label="Samples",
info="""You can choose among high/low hallucinations examples from Wiki Bio.
More samples are available below.""")
example_text = gr.TextArea(value=list(examples.values())[0][1])
with gr.Accordion("Detection threshold", open=False):
treshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=THRESHOLD, label="Detection threshold", info="""The threshold used to detect hallucinations.
A sentence is flagged as hallucination when inconsistency (SelfCheckGPT NLI) score is above threshold.
Higher threshold increases precision (flagged hallucination actually being an hallucination) but reduces recall (percentage of hallucinations flagged).""")
submit = gr.Button("Check hallucination", variant="primary")
with gr.Column():
label = gr.Label(label="Percentage of document flagged as hallucination")
highlighted_prediction = gr.HighlightedText(
label="Hallucination detection",
combine_adjacent=True,
color_map={"hallucination": "red", "factual": "green"},
show_legend=True)
with gr.Accordion("Ground truth", open=False):
gr.Markdown("Ground truth label manually annotated by humans. You can use that to compare the hallucination detection with the ground truth.")
label_ground_truth = gr.Label(label="Percentage of document actually hallucinations")
highlighted_ground_truth = gr.HighlightedText(
label="Ground truth",
combine_adjacent=True,
color_map={"hallucination": "red", "factual": "green"},
show_legend=True)
examples_dropdown.input(mirror, inputs=examples_dropdown, outputs=example_text)
submit.click(evaluate, inputs=[examples_dropdown, treshold], outputs=[label, highlighted_prediction, highlighted_ground_truth, label_ground_truth])
gr.HTML(html_content)
with gr.Tab("Router-Chain-Branch"):
gr.Markdown(titleRAG)
gr.Markdown(description3)
with gr.Row():
with gr.Column():
contextRAG = gr.TextArea(label="Context" , value=document)
ragDocuments = gr.TextArea(label="Comma Seperated RAG (exactly 4)" , value="paneer,chicken,breakfast,dosa")
findRAGDocument = gr.Button("Detect Document")
rag = gr.TextArea(label="Rag Document to Look for")
findRAGDocument.click(detectRag, inputs=[contextRAG,ragDocuments], outputs=rag)
gr.HTML(html_content)
demo.launch() |