Spaces:

eaglelandsonce
/

hhem

Runtime error

File size: 13,032 Bytes

import streamlit as st
import requests
import json
import os
import pandas as pd
from sentence_transformers import CrossEncoder
import numpy as np
import re

from textwrap import dedent
import google.generativeai as genai


# Tool import
from crewai.tools.gemini_tools import GeminiSearchTools
from crewai.tools.mixtral_tools import MixtralSearchTools
from crewai.tools.zephyr_tools import ZephyrSearchTools
from crewai.tools.phi2_tools import Phi2SearchTools


# Google Langchain
from langchain_google_genai import GoogleGenerativeAI

#Crew imports
from crewai import Agent, Task, Crew, Process

# Retrieve API Key from Environment Variable
GOOGLE_AI_STUDIO = os.environ.get('GOOGLE_API_KEY')

# Ensure the API key is available
if not GOOGLE_AI_STUDIO:
    raise ValueError("API key not found. Please set the GOOGLE_AI_STUDIO2 environment variable.")

# Set gemini_llm
gemini_llm = GoogleGenerativeAI(model="gemini-pro", google_api_key=GOOGLE_AI_STUDIO)

# CrewAI +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

def crewai_process_gemini(research_topic):
    # Define your agents with roles and goals
    GeminiAgent = Agent(
        role='Emily Mental Patient Graphic Designer Anxiety',
        goal='To learn how to manage her anxiety in social situations through group therapy.',
        backstory="""Emily is a 28-year-old graphic designer. She has always struggled with social anxiety, 
        making it difficult for her to participate in group settings. She joined the therapy group to improve 
        her social skills and manage her anxiety. You are able to discuss a variety of mental health issues.""",
        verbose=True,
        allow_delegation=False,
        llm = gemini_llm,
        tools=[
                GeminiSearchTools.gemini_search
                   
      ]

    )


    # Create tasks for your agents
    task1 = Task(
        description=f"""Introduction yourself and describe your current mood and any significant events from the week affecting their mental state.
        """,
        agent=GeminiAgent
    )

    # Instantiate your crew with a sequential process
    crew = Crew(
        agents=[GeminiAgent],
        tasks=[task1],
        verbose=2,
        process=Process.sequential
    )

    # Get your crew to work!
    result = crew.kickoff()
    
    return result



def crewai_process_mixtral_crazy(research_topic):
    # Define your agents with roles and goals
    MixtralCrazyAgent = Agent(
        role='Emily Mental Patient Graphic Designer Anxiety',
        goal='To learn how to manage her anxiety in social situations through group therapy.',
        backstory="""Emily is a 28-year-old graphic designer. She has always struggled with social anxiety, 
        making it difficult for her to participate in group settings. She joined the therapy group to improve 
        her social skills and manage her anxiety. You are able to discuss a variety of mental health issues.""",
        verbose=True,
        allow_delegation=False,
        llm = gemini_llm,
        tools=[
                MixtralSearchTools.mixtral_crazy      
      ]

    )


    # Create tasks for your agents
    task1 = Task(
        description=f"""Introduction yourself and describe your current mood and any significant events from the week affecting their mental state.
        """,
        agent=MixtralCrazyAgent
    )

    # Instantiate your crew with a sequential process
    crew = Crew(
        agents=[MixtralCrazyAgent],
        tasks=[task1],
        verbose=2,
        process=Process.sequential
    )

    # Get your crew to work!
    result = crew.kickoff()
    
    return result


def crewai_process_mixtral_normal(research_topic):
    # Define your agents with roles and goals
    MixtralNormalAgent = Agent(
        role='Emily Mental Patient Graphic Designer Anxiety',
        goal='To learn how to manage her anxiety in social situations through group therapy.',
        backstory="""Emily is a 28-year-old graphic designer. She has always struggled with social anxiety, 
        making it difficult for her to participate in group settings. She joined the therapy group to improve 
        her social skills and manage her anxiety. You are able to discuss a variety of mental health issues.""",
        verbose=True,
        allow_delegation=False,
        llm = gemini_llm,
        tools=[
                MixtralSearchTools.mixtral_normal      
      ]

    )


    # Create tasks for your agents
    task1 = Task(
        description=f"""Introduction yourself and describe your current mood and any significant events from the week affecting their mental state.
        """,
        agent=MixtralNormalAgent
    )

    # Instantiate your crew with a sequential process
    crew = Crew(
        agents=[MixtralNormalAgent],
        tasks=[task1],
        verbose=2,
        process=Process.sequential
    )

    # Get your crew to work!
    result = crew.kickoff()
    
    return result


def crewai_process_zephyr_normal(research_topic):
    # Define your agents with roles and goals
    ZephrNormalAgent = Agent(
        role='Emily Mental Patient Graphic Designer Anxiety',
        goal='To learn how to manage her anxiety in social situations through group therapy.',
        backstory="""Emily is a 28-year-old graphic designer. She has always struggled with social anxiety, 
        making it difficult for her to participate in group settings. She joined the therapy group to improve 
        her social skills and manage her anxiety. You are able to discuss a variety of mental health issues.""",
        verbose=True,
        allow_delegation=False,
        llm = gemini_llm,
        tools=[
                ZephyrSearchTools.zephyr_normal     
      ]

    )


    # Create tasks for your agents
    task1 = Task(
        description=f"""Introduction yourself and describe your current mood and any significant events from the week affecting their mental state.
        """,
        agent=ZephrNormalAgent
    )

    # Instantiate your crew with a sequential process
    crew = Crew(
        agents=[ZephrNormalAgent],
        tasks=[task1],
        verbose=2,
        process=Process.sequential
    )

    # Get your crew to work!
    result = crew.kickoff()
    
    return result


def crewai_process_phi2(research_topic):
    # Define your agents with roles and goals
    Phi2Agent = Agent(
        role='Emily Mental Patient Graphic Designer Anxiety',
        goal='To learn how to manage her anxiety in social situations through group therapy.',
        backstory="""Emily is a 28-year-old graphic designer. She has always struggled with social anxiety, 
        making it difficult for her to participate in group settings. She joined the therapy group to improve 
        her social skills and manage her anxiety. You are able to discuss a variety of mental health issues.""",
        verbose=True,
        allow_delegation=False,
        llm = gemini_llm,
        tools=[
                Phi2SearchTools.phi2_search     
      ]

    )


    # Create tasks for your agents
    task1 = Task(
        description=f"""Introduction yourself and describe your current mood and any significant events from the week affecting their mental state.
        """,
        agent=Phi2Agent
    )

    # Instantiate your crew with a sequential process
    crew = Crew(
        agents=[Phi2Agent],
        tasks=[task1],
        verbose=2,
        process=Process.sequential
    )

    # Get your crew to work!
    result = crew.kickoff()
    
    return result



# Credentials ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

corpus_id = os.environ['VECTARA_CORPUS_ID']
customer_id = os.environ['VECTARA_CUSTOMER_ID']
api_key = os.environ['VECTARA_API_KEY']


# Get Data +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


def get_post_headers() -> dict:
    """Returns headers that should be attached to each post request."""
    return {
        "x-api-key": api_key,
        "customer-id": customer_id,
        "Content-Type": "application/json",
    }

def query_vectara(query: str, filter_str="", lambda_val=0.0) -> str:
    corpus_key = {
        "customerId": customer_id,
        "corpusId": corpus_id,
        "lexicalInterpolationConfig": {"lambda": lambda_val},
    }
    if filter_str:
        corpus_key["metadataFilter"] = filter_str

    data = {
        "query": [
            {
                "query": query,
                "start": 0,
                "numResults": 10,
                "contextConfig": {
                    "sentencesBefore": 2,
                    "sentencesAfter": 2
                },
                "corpusKey": [corpus_key],
                "summary": [
                    {
                        "responseLang": "eng",
                        "maxSummarizedResults": 5,
                        "summarizerPromptName": "vectara-summary-ext-v1.2.0"
                    },
                ]                    
            }
        ]
    }

    response = requests.post(
        "https://api.vectara.io/v1/query",
        headers=get_post_headers(),
        data=json.dumps(data),
        timeout=130,
    )

    if response.status_code != 200:
        st.error(f"Query failed (code {response.status_code}, reason {response.reason}, details {response.text})")
        return ""

    result = response.json()

    answer = result["responseSet"][0]["summary"][0]["text"]
    return re.sub(r'\[\d+(,\d+){0,5}\]', '', answer)



# Initialize the HHEM model +++++++++++++++++++++++++++++++++++++++++++++++
model = CrossEncoder('vectara/hallucination_evaluation_model')

# Function to compute HHEM scores
def compute_hhem_scores(texts, summary):
    pairs = [[text, summary] for text in texts]
    scores = model.predict(pairs)
    return scores

# Define the Vectara query function
def vectara_query(query: str, config: dict):
    corpus_key = [{
        "customerId": config["customer_id"],
        "corpusId": config["corpus_id"],
        "lexicalInterpolationConfig": {"lambda": config.get("lambda_val", 0.5)},
    }]
    data = {
        "query": [{
            "query": query,
            "start": 0,
            "numResults": config.get("top_k", 10),
            "contextConfig": {
                "sentencesBefore": 2,
                "sentencesAfter": 2,
            },
            "corpusKey": corpus_key,
            "summary": [{
                "responseLang": "eng",
                "maxSummarizedResults": 5,
            }]
        }]
    }

    headers = {
        "x-api-key": config["api_key"],
        "customer-id": config["customer_id"],
        "Content-Type": "application/json",
    }
    response = requests.post(
        headers=headers,
        url="https://api.vectara.io/v1/query",
        data=json.dumps(data),
    )
    if response.status_code != 200:
        st.error(f"Query failed (code {response.status_code}, reason {response.reason}, details {response.text})")
        return [], ""

    result = response.json()
    responses = result["responseSet"][0]["response"]
    summary = result["responseSet"][0]["summary"][0]["text"]

    res = [[r['text'], r['score']] for r in responses]
    return res, summary


# Create the main app with three tabs
tab1, tab2, tab3, tab4 = st.tabs(["Synthetic Data", "Data Query", "HHEM-Victara Query Tuner", "Model Evaluation"])

with tab1:
    st.header("Synthetic Data")
    st.link_button("Create Synthetic Medical Data", "https://chat.openai.com/g/g-XyHciw52w-synthetic-clinical-data")
    
   

with tab2:
    st.header("Data Query")
    st.link_button("Query & Summarize Data", "https://chat.openai.com/g/g-9tWqg4gRY-explore-summarize-medical-data")
   
with tab3:
    
    st.header("HHEM-Victara Query Tuner")
    
    # User inputs
    query = st.text_area("Enter your text for query tuning", "", height=75)
    lambda_val = st.slider("Lambda Value", min_value=0.0, max_value=1.0, value=0.5)
    top_k = st.number_input("Top K Results", min_value=1, max_value=50, value=10)
    
    
    if st.button("Query Vectara"):
        config = {
    
            "api_key": os.environ.get("VECTARA_API_KEY", ""),
            "customer_id": os.environ.get("VECTARA_CUSTOMER_ID", ""),
            "corpus_id": os.environ.get("VECTARA_CORPUS_ID", ""),      
    
            "lambda_val": lambda_val,
            "top_k": top_k,
        }
    
        results, summary = vectara_query(query, config)
    
        if results:
            st.subheader("Summary")
            st.write(summary)
            
            st.subheader("Top Results")
            
            # Extract texts from results
            texts = [r[0] for r in results[:5]]
            
            # Compute HHEM scores
            scores = compute_hhem_scores(texts, summary)
            
            # Prepare and display the dataframe
            df = pd.DataFrame({'Fact': texts, 'HHEM Score': scores})
            st.dataframe(df)
        else:
            st.write("No results found.")

with tab4:
    
    st.header("Model Evaluation")