hhem / app.py
eaglelandsonce's picture
Update app.py
2cd5e2c verified
raw
history blame
12.2 kB
import streamlit as st
import requests
import json
import os
import pandas as pd
from sentence_transformers import CrossEncoder
import numpy as np
import re
from textwrap import dedent
import google.generativeai as genai
# Tool import
from crewai.tools.gemini_tools import GeminiSearchTools
from crewai.tools.mixtral_tools import MixtralSearchTools
from crewai.tools.zephyr_tools import ZephyrSearchTools
from crewai.tools.phi2_tools import Phi2SearchTools
# Google Langchain
from langchain_google_genai import GoogleGenerativeAI
#Crew imports
from crewai import Agent, Task, Crew, Process
# Retrieve API Key from Environment Variable
GOOGLE_AI_STUDIO = os.environ.get('GOOGLE_API_KEY')
# Ensure the API key is available
if not GOOGLE_AI_STUDIO:
raise ValueError("API key not found. Please set the GOOGLE_AI_STUDIO2 environment variable.")
# Set gemini_llm
gemini_llm = GoogleGenerativeAI(model="gemini-pro", google_api_key=GOOGLE_AI_STUDIO)
# CrewAI +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
def crewai_process_gemini(research_topic):
# Define your agents with roles and goals
GeminiAgent = Agent(
role='Summary Evaluator',
goal='To learn how to manage her anxiety in social situations through group therapy.',
backstory="""Skilled in running query evaluation""",
verbose=True,
allow_delegation=False,
llm = gemini_llm,
tools=[
GeminiSearchTools.gemini_search
]
)
# Create tasks for your agents
task1 = Task(
description=f"""Summarize {research_topic}""",
agent=GeminiAgent
)
# Instantiate your crew with a sequential process
crew = Crew(
agents=[GeminiAgent],
tasks=[task1],
verbose=2,
process=Process.sequential
)
# Get your crew to work!
result = crew.kickoff()
return result
def crewai_process_mixtral_crazy(research_topic):
# Define your agents with roles and goals
MixtralCrazyAgent = Agent(
role='Summary Evaluator',
goal='Evaluate the summary using the HHEM-Victara Tuner',
backstory="""Skilled in running query evaluation""",
verbose=True,
allow_delegation=False,
llm = gemini_llm,
tools=[
MixtralSearchTools.mixtral_crazy
]
)
# Create tasks for your agents
task1 = Task(
description=f"""Summarize {research_topic}""",
agent=MixtralCrazyAgent
)
# Instantiate your crew with a sequential process
crew = Crew(
agents=[MixtralCrazyAgent],
tasks=[task1],
verbose=2,
process=Process.sequential
)
# Get your crew to work!
result = crew.kickoff()
return result
def crewai_process_mixtral_normal(research_topic):
# Define your agents with roles and goals
MixtralNormalAgent = Agent(
role='Summary Evaluator',
goal='Evaluate the summary using the HHEM-Victara Tuner',
backstory="""Skilled in running query evaluation""",
verbose=True,
allow_delegation=False,
llm = gemini_llm,
tools=[
MixtralSearchTools.mixtral_normal
]
)
# Create tasks for your agents
task1 = Task(
description=f"""Summarize {research_topic}""",
agent=MixtralNormalAgent
)
# Instantiate your crew with a sequential process
crew = Crew(
agents=[MixtralNormalAgent],
tasks=[task1],
verbose=2,
process=Process.sequential
)
# Get your crew to work!
result = crew.kickoff()
return result
def crewai_process_zephyr_normal(research_topic):
# Define your agents with roles and goals
ZephrNormalAgent = Agent(
role='Summary Evaluator',
goal='Evaluate the summary using the HHEM-Victara Tuner',
backstory="""Skilled in running query evaluation""",
verbose=True,
allow_delegation=False,
llm = gemini_llm,
tools=[
ZephyrSearchTools.zephyr_normal
]
)
# Create tasks for your agents
task1 = Task(
description=f"""Summarize {research_topic}""",
agent=ZephrNormalAgent
)
# Instantiate your crew with a sequential process
crew = Crew(
agents=[ZephrNormalAgent],
tasks=[task1],
verbose=2,
process=Process.sequential
)
# Get your crew to work!
result = crew.kickoff()
return result
def crewai_process_phi2(research_topic):
# Define your agents with roles and goals
Phi2Agent = Agent(
role='Emily Mental Patient Graphic Designer Anxiety',
goal='Evaluate the summary using the HHEM-Victara Tuner',
backstory="""Skilled in running query evaluation""",
verbose=True,
allow_delegation=False,
llm = gemini_llm,
tools=[
Phi2SearchTools.phi2_search
]
)
# Create tasks for your agents
task1 = Task(
description=f"""Summarize {research_topic}""",
agent=Phi2Agent
)
# Instantiate your crew with a sequential process
crew = Crew(
agents=[Phi2Agent],
tasks=[task1],
verbose=2,
process=Process.sequential
)
# Get your crew to work!
result = crew.kickoff()
return result
# Credentials ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
corpus_id = os.environ['VECTARA_CORPUS_ID']
customer_id = os.environ['VECTARA_CUSTOMER_ID']
api_key = os.environ['VECTARA_API_KEY']
# Get Data +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
def get_post_headers() -> dict:
"""Returns headers that should be attached to each post request."""
return {
"x-api-key": api_key,
"customer-id": customer_id,
"Content-Type": "application/json",
}
def query_vectara(query: str, filter_str="", lambda_val=0.0) -> str:
corpus_key = {
"customerId": customer_id,
"corpusId": corpus_id,
"lexicalInterpolationConfig": {"lambda": lambda_val},
}
if filter_str:
corpus_key["metadataFilter"] = filter_str
data = {
"query": [
{
"query": query,
"start": 0,
"numResults": 10,
"contextConfig": {
"sentencesBefore": 2,
"sentencesAfter": 2
},
"corpusKey": [corpus_key],
"summary": [
{
"responseLang": "eng",
"maxSummarizedResults": 5,
"summarizerPromptName": "vectara-summary-ext-v1.2.0"
},
]
}
]
}
response = requests.post(
"https://api.vectara.io/v1/query",
headers=get_post_headers(),
data=json.dumps(data),
timeout=130,
)
if response.status_code != 200:
st.error(f"Query failed (code {response.status_code}, reason {response.reason}, details {response.text})")
return ""
result = response.json()
answer = result["responseSet"][0]["summary"][0]["text"]
return re.sub(r'\[\d+(,\d+){0,5}\]', '', answer)
# Initialize the HHEM model +++++++++++++++++++++++++++++++++++++++++++++++
model = CrossEncoder('vectara/hallucination_evaluation_model')
# Function to compute HHEM scores
def compute_hhem_scores(texts, summary):
pairs = [[text, summary] for text in texts]
scores = model.predict(pairs)
return scores
# Define the Vectara query function
def vectara_query(query: str, config: dict):
corpus_key = [{
"customerId": config["customer_id"],
"corpusId": config["corpus_id"],
"lexicalInterpolationConfig": {"lambda": config.get("lambda_val", 0.5)},
}]
data = {
"query": [{
"query": query,
"start": 0,
"numResults": config.get("top_k", 10),
"contextConfig": {
"sentencesBefore": 2,
"sentencesAfter": 2,
},
"corpusKey": corpus_key,
"summary": [{
"responseLang": "eng",
"maxSummarizedResults": 5,
}]
}]
}
headers = {
"x-api-key": config["api_key"],
"customer-id": config["customer_id"],
"Content-Type": "application/json",
}
response = requests.post(
headers=headers,
url="https://api.vectara.io/v1/query",
data=json.dumps(data),
)
if response.status_code != 200:
st.error(f"Query failed (code {response.status_code}, reason {response.reason}, details {response.text})")
return [], ""
result = response.json()
responses = result["responseSet"][0]["response"]
summary = result["responseSet"][0]["summary"][0]["text"]
res = [[r['text'], r['score']] for r in responses]
return res, summary
# Create the main app with three tabs
tab1, tab2, tab3, tab4 = st.tabs(["Synthetic Data", "Data Query", "HHEM-Victara Query Tuner", "Model Evaluation"])
with tab1:
st.header("Synthetic Data")
st.link_button("Create Synthetic Medical Data", "https://chat.openai.com/g/g-XyHciw52w-synthetic-clinical-data")
with tab2:
st.header("Data Query")
st.link_button("Query & Summarize Data", "https://chat.openai.com/g/g-9tWqg4gRY-explore-summarize-medical-data")
with tab3:
st.header("HHEM-Victara Query Tuner")
# User inputs
query = st.text_area("Enter your text for query tuning", "", height=75)
lambda_val = st.slider("Lambda Value", min_value=0.0, max_value=1.0, value=0.5)
top_k = st.number_input("Top K Results", min_value=1, max_value=50, value=10)
if st.button("Query Vectara"):
config = {
"api_key": os.environ.get("VECTARA_API_KEY", ""),
"customer_id": os.environ.get("VECTARA_CUSTOMER_ID", ""),
"corpus_id": os.environ.get("VECTARA_CORPUS_ID", ""),
"lambda_val": lambda_val,
"top_k": top_k,
}
results, summary = vectara_query(query, config)
if results:
st.subheader("Summary")
st.write(summary)
st.subheader("Top Results")
# Extract texts from results
texts = [r[0] for r in results[:5]]
# Compute HHEM scores
scores = compute_hhem_scores(texts, summary)
# Prepare and display the dataframe
df = pd.DataFrame({'Fact': texts, 'HHEM Score': scores})
st.dataframe(df)
else:
st.write("No results found.")
with tab4:
st.header("Model Evaluation")
# User input for the research topic
research_topic = st.text_input('Enter your research topic:', '')
# Selection box for the function to execute
process_selection = st.selectbox(
'Choose the process to run:',
('crewai_process_gemini', 'crewai_process_mixtral_crazy', 'crewai_process_mixtral_normal', 'crewai_process_zephyr_normal', 'crewai_process_phi2')
)
# Button to execute the chosen function
if st.button('Run Process'):
if research_topic: # Ensure there's a topic provided
if process_selection == 'crewai_process_gemini':
result = crewai_process_gemini(research_topic)
elif process_selection == 'crewai_process_mixtral_crazy':
result = crewai_process_mixtral_crazy(research_topic)
elif process_selection == 'crewai_process_mixtral_normal':
result = crewai_process_mixtral_normal(research_topic)
elif process_selection == 'crewai_process_zephyr_normal':
result = crewai_process_zephyr_normal(research_topic)
elif process_selection == 'crewai_process_phi2':
result = crewai_process_phi2(research_topic)
st.write(result)
else:
st.warning('Please enter a research topic.')