import streamlit as st import requests import json import os import pandas as pd from sentence_transformers import CrossEncoder import numpy as np # Initialize the HHEM model model = CrossEncoder('vectara/hallucination_evaluation_model') # Function to compute HHEM scores def compute_hhem_scores(texts, summary): pairs = [[text, summary] for text in texts] scores = model.predict(pairs) return scores # Define the Vectara query function def vectara_query(query: str, config: dict): corpus_key = [{ "customerId": config["customer_id"], "corpusId": config["corpus_id"], "lexicalInterpolationConfig": {"lambda": config.get("lambda_val", 0.5)}, }] data = { "query": [{ "query": query, "start": 0, "numResults": config.get("top_k", 10), "contextConfig": { "sentencesBefore": 2, "sentencesAfter": 2, }, "corpusKey": corpus_key, "summary": [{ "responseLang": "eng", "maxSummarizedResults": 5, }] }] } headers = { "x-api-key": config["api_key"], "customer-id": config["customer_id"], "Content-Type": "application/json", } response = requests.post( headers=headers, url="https://api.vectara.io/v1/query", data=json.dumps(data), ) if response.status_code != 200: st.error(f"Query failed (code {response.status_code}, reason {response.reason}, details {response.text})") return [], "" result = response.json() responses = result["responseSet"][0]["response"] summary = result["responseSet"][0]["summary"][0]["text"] res = [[r['text'], r['score']] for r in responses] return res, summary # Streamlit UI setup st.title("Vectara Content Query Interface") # User inputs query = st.text_input("Enter your query here", "") lambda_val = st.slider("Lambda Value", min_value=0.0, max_value=1.0, value=0.5) top_k = st.number_input("Top K Results", min_value=1, max_value=50, value=10) if st.button("Query Vectara"): config = { "api_key": os.environ.get("VECTARA_API_KEY", ""), "customer_id": os.environ.get("VECTARA_CUSTOMER_ID", ""), "corpus_id": os.environ.get("VECTARA_CORPUS_ID", ""), "lambda_val": lambda_val, "top_k": top_k, } results, summary = vectara_query(query, config) if results: st.subheader("Summary") st.write(summary) st.subheader("Top Results") # Extract texts from results texts = [r[0] for r in results[:5]] # Compute HHEM scores scores = compute_hhem_scores(texts, summary) # Prepare and display the dataframe df = pd.DataFrame({'Fact': texts, 'HHEM Score': scores}) st.dataframe(df) else: st.write("No results found.")