LLM Hackathon : LLM Hallucination Detector

from selfcheckgpt.modeling_selfcheck import SelfCheckNLI, SelfCheckBERTScore, SelfCheckNgram
import torch
import spacy
import os
import gradio as gr


# Load the English language model
nlp = spacy.load("en_core_web_sm")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
selfcheck_nli = SelfCheckNLI(device=device) # set device to 'cuda' if GPU is available
selfcheck_bertscore = SelfCheckBERTScore(rescale_with_baseline=True)
selfcheck_ngram = SelfCheckNgram(n=1) # n=1 means Unigram, n=2 means Bigram, etc.

openai_key = os.getenv("OPENAI_API_KEY")
resource_url = os.getenv("OPENAI_API_RESOURCEURL")
api_version =os.getenv("OPENAI_API_VERSION")
api_url=os.getenv("OPENAI_API_RESOURCEURL")

import os
from openai import AzureOpenAI

client = AzureOpenAI(
    api_key=openai_key,
    api_version=api_version,
    azure_endpoint = api_url
    )

deployment_name=os.getenv("model_name") #This will correspond to the custom name you chose for your deployment when you deployed a model. Use a gpt-35-turbo-instruct deployment.

import os
from openai import AzureOpenAI

client = AzureOpenAI(
  api_key = openai_key,
  api_version =api_version,
  azure_endpoint =api_url

)

def generate_response(prompt):
    response = client.chat.completions.create(
    model=deployment_name, # model = "deployment_name".
    temperature=0.0,
    messages=[

        {"role": "user", "content": prompt}
    ]
    )
    return response.choices[0].message.content

def generate_response_high_temp(prompt):
    response = client.chat.completions.create(
    model=deployment_name, # model = "deployment_name".
    temperature=1.0,
    messages=[

        {"role": "user", "content": prompt}
    ]
    )
    return response.choices[0].message.content

def create_dataset(prompt):
    s1 = generate_response_high_temp(prompt)
    s2 = generate_response_high_temp(prompt)
    s3 = generate_response_high_temp(prompt)
    return s1, s2, s3

def split_sent(sentence):
  return [sent.text.strip() for sent in nlp(sentence).sents]

def func_selfcheck_nli(sentence, s1, s2, s3):
    sentence1 = [sentence[2:-2]]
    sample_dataset = [s1, s2, s3]

    
    score = selfcheck_nli.predict(
    sentences = sentence1, # list of sentences
    sampled_passages = sample_dataset, # list of sampled passages
    )
   
    if (score > 0.35):
      return f"The LLM is hallucinating with selfcheck nli score of {score}"
    else:
      return f"The LLM is generating true information with selfcheck nli score of {score}"

def func_selfcheckbert(sentence, s1, s2, s3):
    sentence1 = [sentence[2:-2]]
    sample_dataset = [s1, s2, s3]
    sent_scores_bertscore = selfcheck_bertscore.predict(
      sentences = sentence1, # list of sentences
      sampled_passages = sample_dataset, # list of sampled passages
    )
    
    if (sent_scores_bertscore > 0.6):
      return f"The LLM is hallucinating with selfcheck BERT score of {sent_scores_bertscore}"
    else:
      return f"The LLM is generating true information with selfcheck BERT score of {sent_scores_bertscore}"

def func_selfcheckngram(sentence, s1, s2, s3):
    sentence1 = [sentence[2:-2]]
    sample_dataset = [s1, s2, s3]
    sentences_split = split_sent(sentence1[0])
    
    sent_scores_ngram = selfcheck_ngram.predict(
      sentences = sentences_split,
      passage = sentence1[0],
      sampled_passages = sample_dataset,
    )
    
    avg_max_neg_logprob = sent_scores_ngram['doc_level']['avg_max_neg_logprob']
    if(avg_max_neg_logprob > 6):
       return f"The LLM is hallucinating with selfcheck ngram score of {avg_max_neg_logprob}"
    else:
        return f"The LLM is generating true information with selfcheck ngram score of {avg_max_neg_logprob}"

    return sent_scores_ngram

def generating_samples(prompt):
    prompt_template=f"This is a Wikipedia passage on the topic of '{prompt}' in 100 words"
    sample_response=generate_response(prompt_template)

    s1, s2, s3 =create_dataset(prompt_template)

    sentence=[sample_response]

    return sentence, s1, s2, s3
with gr.Blocks() as demo:
  gr.Markdown(
    """
    <h1> LLM Hackathon : LLM Hallucination Detector​ <h1>
    """)
  with gr.Column():
    prompt = gr.Textbox(label="prompt")

  with gr.Column():
    sentence = gr.Textbox(label="response")
    print(sentence)

  with gr.Row():
    s1 = gr.Textbox(label="sample1")
    s2 = gr.Textbox(label="sample2")
    s3 = gr.Textbox(label="sample3")

  with gr.Column():
    score= gr.Textbox(label="output")

  output_response = gr.Button("Generate response")
  output_response.click(
      fn=generating_samples,
      inputs=prompt,
      outputs=[sentence, s1, s2, s3]
  )
  with gr.Row(equal_height=True):


    self_check_nli_button = gr.Button("self check nli")
    self_check_nli_button.click(
        fn=func_selfcheck_nli,
        inputs=[sentence, s1, s2, s3],
        outputs=score
    )

    selfcheckbert_button = gr.Button("self check Bert")
    selfcheckbert_button.click(
        fn=func_selfcheckbert,
        inputs=[sentence, s1, s2, s3],
        outputs=score
    )

    self_check_ngram_button = gr.Button("self check ngram")
    self_check_ngram_button.click(
        fn=func_selfcheckngram,
        inputs=[sentence, s1, s2, s3],
        outputs=score
    )

demo.launch()