File size: 11,292 Bytes
109a4ff
 
 
 
d6a6654
7f7868d
109a4ff
 
 
 
69425c0
109a4ff
b4a5ab4
c01709b
80a7945
39f1e00
59cbc76
0a0f3eb
109a4ff
 
 
 
d5c2739
4f0bd75
d5c2739
 
 
ae3ef14
 
 
4f0bd75
 
 
d5c2739
 
 
 
 
 
6c6e336
d5c2739
 
 
 
 
 
 
 
 
109a4ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207f13b
 
 
 
109a4ff
 
 
207f13b
109a4ff
 
 
 
 
 
207f13b
 
109a4ff
 
207f13b
 
 
 
 
 
109a4ff
6e5e0b3
adbd551
 
 
 
 
 
 
 
 
 
 
 
 
 
59cbc76
bb49f2c
adbd551
 
 
 
 
e960cef
adbd551
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e960cef
adbd551
 
 
 
 
 
59cbc76
0a0f3eb
364b156
 
2d870ff
adbd551
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59cbc76
80a7945
 
8308898
364b156
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109a4ff
 
 
364b156
 
 
 
 
 
 
 
 
 
59cbc76
80a7945
adbd551
39f1e00
adbd551
 
 
39f1e00
adbd551
 
 
0a0f3eb
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
import gradio as gr
import pandas as pd
import evaluate
import theme
import re



df = pd.read_csv("./wiki_bio_gpt3_hallucination.csv")

title = "<h1 style='text-align: center; color: #333333; font-size: 40px;'> Maya, Mithya and AI Hallucinations </h1>"

description = "1)Put your context (could be any topic)</br> 2) Create Questions, </br> 3) Summarize </br>4)  Detect Hallucination ( differece between Summary and Context) ."
description2 = "Detect Hallucination using NLI ."
titleRAG = "<h1 style='text-align: center; color: #333333; font-size: 40px;'> Routing , Chaining and Branching of RAG documents </h1>"
description3 = " This (experimental) novel approach involves creating a router across multiple Rag documents, chaining them together, and incorporating branching for greater flexibility and adaptability."



import numpy as np
import pandas as pd
import ast
from openai import OpenAI 
import os
import torch
import spacy
from selfcheckgpt.modeling_selfcheck import SelfCheckMQAG, SelfCheckBERTScore
import en_core_web_sm
nlp = en_core_web_sm.load()


os.environ["OPENAI_API_KEY"]=os.environ['API_TOKEN']
client = OpenAI()
torch.manual_seed(28)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

selfcheck_mqag = SelfCheckMQAG(device=device)
selfcheck_bertscore = SelfCheckBERTScore()
#nlp = spacy.load("en_core_web_sm")

from selfcheckgpt.modeling_mqag import MQAG
mqag_model = MQAG(
    g1_model_type='race', # race (more abstractive), squad (more extractive)
    device=device
)

document =r"""This is the complete recipe cooking paneer butter masala.Heat 1 teaspoon of oil in a pan on medium heat. Once the oil is hot, add the bay leaf, cinnamon stick, cloves and saute for few seconds.Then add the the onion, garlic, ginger and saute for 2 to 3 minutes until the onion is translucent. Add the tomatoes and cashews and mix. Then add 1 cup of water.Cover the pan and cook on medium heat for 15 minutes.After 15 minutes, remove the pan from heat. Remove the bay leaf, cinnamon stick and cloves.Let the mixture cool down a bit and then transfer to a blender. It’s important to let it cool down a bit else it will all blow up from the mixer. Grind the masala to a smooth paste and set aside. To the same pan now add 2 tablespoons butter along with remaining 1 teaspoon oil on medium heat.Once the butter melts, add the red chili powder and the Kashmiri red chili powder and fry for few seconds. This will give the curry a nice red color. Then add the ground paste back into the pan along with the garam masala (start with 1/2 teaspoon and add the remaining 1/4 teaspoon at the end only if you feel like the curry needs that extra bit of garam masala), cardamom powder, sugar, salt and tomato paste (if using).Mix well and cook for 1-2 minutes.Then add the cream and mix. Add in the paneer and cook for 2 to 3 minutes on medium heat. Finally add crushed kasuri methi.Garnish paneer butter masala with cilantro and serve hot with naan or rice!""".replace("\n", "")
summary = "Heat oil in a pan, sautΓ© spices, onions, garlic, ginger, then tomatoes and cashews, cook with water, blend into a paste, melt butter, add chili powders, return paste to pan with spices, sugar, salt, tomato paste, cook, add cream, paneer, kasuri methi, garnish with cilantro, serve with naan or rice."
df = pd.read_csv("./wiki_bio_gpt3_hallucination.csv")

def compute_score_per_document(scores):
    scores = ast.literal_eval(scores)
    scores = np.array(scores)
    return scores.mean()

df["average_score"] = df["sent_scores_nli"].apply(compute_score_per_document)
sorted_df = df.sort_values(by=['average_score'], ascending=False)

THRESHOLD = 0.5

examples = {}
for i in range(3):
    sample = sorted_df.iloc[[i]]
    examples[f"High hallucination sample {i+1}"] = (sample.index[0] , sample["gpt3_text"].values[0])
    sample = sorted_df.iloc[[-(i+1)]]
    examples[f"Low hallucination sample {i+1}"] = (sample.index[0] , sample["gpt3_text"].values[0])

def mirror(example):
    return examples[example][1]

def evaluate(example, treshold):
    index = examples[example][0]
    row = sorted_df.loc[index]
    
    scores = ast.literal_eval(row["sent_scores_nli"])
    sentences = ast.literal_eval(row["gpt3_sentences"])
    annotations = ast.literal_eval(row["annotation"])
    predictions = []
    labels = []

    n = len(sentences)
    average_score_predicted = 0.0
    average_score_truth = 0.0

    for score, sentence, annotation in zip(scores, sentences, annotations):
        if score > treshold:
            prediction = "hallucination"
            average_score_predicted += 1.0
        else:
            prediction = "factual"
        if annotation == "accurate":
            annotation = "factual"
        else:
            annotation = "hallucination"
            average_score_truth += 1.0
        
        predictions.append((sentence, prediction))
        labels.append((sentence, annotation))
    average_score_predicted /= n
    average_score_predicted = "{:.0%}".format(average_score_predicted)

    average_score_truth /= n
    average_score_truth = "{:.0%}".format(average_score_truth)
    return average_score_predicted, predictions, labels, average_score_truth


def read_html_file(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            html_content = file.read()
            html_content = html_content.encode('ascii', 'ignore').decode('ascii')
            html_content= html_content.replace("\n","")                 
            html_content=re.sub( ">\s+<", "><" , html_content)
        return html_content
    except FileNotFoundError:
        print(f"File not found: {file_path}")
        return None
    except Exception as e:
        print(f"An error occurred: {str(e)}")
        return None
html_content = read_html_file("cookgpt.html")
print(html_content)
def createQuestions(documents):
    questions = mqag_model.generate(context=document, do_sample=True, num_questions=3)
    
    return questions
def detect(context, summary):
    print(summary)
    score = mqag_model.score(candidate=summary, reference=context, num_questions=3, verbose=True)
    return score
    
def summarize(document):
  response = client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[
      {
        "role": "system",
        "content": "Summarize content you are provided with for a second-grade student."
      },
      {
        "role": "user",
        "content":document
      }
    ],
    temperature=0.7,
    max_tokens=64,
    top_p=1
  )
  return response.choices[0].message.content
def detectRag(document,rags):
  print(rags)  
  options = sorted([word.capitalize() for word in rags.split(",")])
  print(options)
  questions = [{'question': "what is the main topic of this?", 'options': options}]
  probs = mqag_model.answer(questions=questions, context=document)
  print(probs[0])
  return probs[0]

with gr.Blocks() as demo:
    with gr.Tab("Maya"):
        gr.Markdown(title)
        gr.Markdown(description)        
        with gr.Row():
            with gr.Column():
              context = gr.TextArea(label="Context" , value=document)                          
              questions = gr.TextArea(label="Questions")
              createQuestiobBTN = gr.Button("Create Questions")
              createQuestiobBTN.click(createQuestions, inputs=context, outputs=questions)
             
        with gr.Row():
            with gr.Column():
              summaryTx = gr.TextArea(label="Summary" , value=summary)  
              createSummaryBTN = gr.Button("Create Summary")
              createSummaryBTN.click(summarize, inputs=context, outputs=summaryTx)
              score = gr.TextArea(label="Score")
              detectHallucinate = gr.Button("Detect Hallucination")  
              detectHallucinate.click(detect, inputs=[context,summaryTx], outputs=score)   
        gr.HTML(html_content)
    with gr.Tab("Mithya"):  
        gr.Markdown(title)
        gr.Markdown(description2)
        with gr.Row():
            with gr.Column():
                examples_dropdown = gr.Dropdown(choices=list(examples.keys()), value=list(examples.keys())[0],
                                        interactive=True,
                                        label="Samples",
                                        info="""You can choose among high/low hallucinations examples from Wiki Bio.
                                        More samples are available below.""")
                example_text = gr.TextArea(value=list(examples.values())[0][1])
                with gr.Accordion("Detection threshold", open=False):
                    treshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=THRESHOLD, label="Detection threshold", info="""The threshold used to detect hallucinations.
                                        A sentence is flagged as hallucination when inconsistency (SelfCheckGPT NLI) score is above threshold.
                                        Higher threshold increases precision (flagged hallucination actually being an hallucination) but reduces recall (percentage of hallucinations flagged).""")
                submit = gr.Button("Check hallucination", variant="primary")
            with gr.Column():
                label = gr.Label(label="Percentage of document flagged as hallucination")
                highlighted_prediction = gr.HighlightedText(
                                        label="Hallucination detection",
                                        combine_adjacent=True,
                                        color_map={"hallucination": "red", "factual": "green"},
                                        show_legend=True)
                with gr.Accordion("Ground truth", open=False):
                    gr.Markdown("Ground truth label manually annotated by humans. You can use that to compare the hallucination detection with the ground truth.")
                    label_ground_truth = gr.Label(label="Percentage of document actually hallucinations")
                    highlighted_ground_truth = gr.HighlightedText(
                                            label="Ground truth",
                                            combine_adjacent=True,
                                            color_map={"hallucination": "red", "factual": "green"},
                                            show_legend=True)
        examples_dropdown.input(mirror, inputs=examples_dropdown, outputs=example_text)
        submit.click(evaluate, inputs=[examples_dropdown, treshold], outputs=[label, highlighted_prediction, highlighted_ground_truth, label_ground_truth])
        gr.HTML(html_content)
    with gr.Tab("Router-Chain-Branch"):  
        gr.Markdown(titleRAG)
        gr.Markdown(description3)
        with gr.Row():
            with gr.Column(): 
              contextRAG = gr.TextArea(label="Context" , value=document)
              ragDocuments = gr.TextArea(label="Comma Seperated RAG (exactly 4)" , value="paneer,chicken,breakfast,dosa")  
              findRAGDocument = gr.Button("Detect Document") 
              rag = gr.TextArea(label="Rag Document to Look for")  
              findRAGDocument.click(detectRag, inputs=[contextRAG,ragDocuments], outputs=rag)  
        gr.HTML(html_content)         
demo.launch()